From 528e8cf9dfe55ff783426c55950b07321347289b Mon Sep 17 00:00:00 2001 From: root Date: Fri, 14 Nov 2025 02:00:05 +0000 Subject: [PATCH 1/9] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20eliminate=20dupl?= =?UTF-8?q?ication=20in=20knownModels.ts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix GPT_MINI to use correct model ID (gpt-5.1-codex-mini) - Remove duplicated string constant exports (SONNET, GPT, etc.) - Callers now use KNOWN_MODELS.SONNET.id directly - Export KNOWN_MODELS object for direct access - Build MODEL_NAMES programmatically from KNOWN_MODELS - Groups by provider automatically - No manual duplication needed - Add integration test verifying all known models exist in models.json - Tests run outside IPC layer - Catches missing/renamed models early Generated with `mux` --- src/constants/knownModels.ts | 122 + src/services/historyService.ts | 3 +- src/services/mock/mockScenarioPlayer.ts | 3 +- src/services/mock/scenarios/basicChat.ts | 5 +- .../mock/scenarios/permissionModes.ts | 9 +- src/services/mock/scenarios/review.ts | 11 +- src/services/mock/scenarios/slashCommands.ts | 5 +- src/services/mock/scenarios/toolFlows.ts | 21 +- src/services/streamManager.test.ts | 13 +- src/services/workspaceTitleGenerator.ts | 5 +- src/utils/ai/models.ts | 4 +- src/utils/main/StreamingTokenTracker.test.ts | 13 +- src/utils/main/tokenizer.test.ts | 3 +- src/utils/main/tokenizer.ts | 19 +- src/utils/messages/compactionOptions.test.ts | 15 +- src/utils/slashCommands/compact.test.ts | 15 +- src/utils/slashCommands/parser.test.ts | 5 +- .../slashCommands/parser_multiline.test.ts | 3 +- src/utils/slashCommands/registry.ts | 13 +- src/utils/tokens/modelStats.test.ts | 13 +- src/utils/tokens/models.json | 3179 ++++++++++++++--- tests/models/knownModels.test.ts | 44 + 22 files changed, 2845 insertions(+), 678 deletions(-) create mode 100644 src/constants/knownModels.ts create mode 100644 tests/models/knownModels.test.ts diff --git a/src/constants/knownModels.ts b/src/constants/knownModels.ts new file mode 100644 index 000000000..512ab41e0 --- /dev/null +++ b/src/constants/knownModels.ts @@ -0,0 +1,122 @@ +/** + * Centralized model metadata. Update model versions here and everywhere else will follow. + */ + +type ModelProvider = "anthropic" | "openai"; + +interface KnownModelDefinition { + /** Provider identifier used by SDK factories */ + provider: ModelProvider; + /** Provider-specific model name (no provider prefix) */ + providerModelId: string; + /** Aliases that should resolve to this model */ + aliases?: string[]; + /** Preload tokenizer encodings at startup */ + warm?: boolean; + /** Use as global default model */ + isDefault?: boolean; + /** Optional tokenizer override for ai-tokenizer */ + tokenizerOverride?: string; +} + +interface KnownModel extends KnownModelDefinition { + /** Full model id string in the format provider:model */ + id: `${ModelProvider}:${string}`; +} + +// Model definitions. Note we avoid listing legacy models here. These represent the focal models +// of the community. +const MODEL_DEFINITIONS = { + SONNET: { + provider: "anthropic", + providerModelId: "claude-sonnet-4-5", + aliases: ["sonnet"], + warm: true, + isDefault: true, + tokenizerOverride: "anthropic/claude-sonnet-4.5", + }, + HAIKU: { + provider: "anthropic", + providerModelId: "claude-haiku-4-5", + aliases: ["haiku"], + tokenizerOverride: "anthropic/claude-3.5-haiku", + }, + OPUS: { + provider: "anthropic", + providerModelId: "claude-opus-4-1", + aliases: ["opus"], + }, + GPT: { + provider: "openai", + providerModelId: "gpt-5.1", + aliases: ["gpt-5.1"], + warm: true, + }, + GPT_PRO: { + provider: "openai", + providerModelId: "gpt-5-pro", + aliases: ["gpt-5-pro"], + }, + GPT_CODEX: { + provider: "openai", + providerModelId: "gpt-5.1-codex", + aliases: ["codex"], + warm: true, + }, + GPT_MINI: { + provider: "openai", + providerModelId: "gpt-5.1-codex-mini", + }, +} as const satisfies Record; + +export type KnownModelKey = keyof typeof MODEL_DEFINITIONS; + +export const KNOWN_MODELS = Object.fromEntries( + Object.entries(MODEL_DEFINITIONS).map(([key, definition]) => [ + key, + { + ...definition, + id: `${definition.provider}:${definition.providerModelId}` as `${ModelProvider}:${string}`, + }, + ]) +) as Record; + +export function getKnownModel(key: KnownModelKey): KnownModel { + return KNOWN_MODELS[key]; +} + +// ------------------------------------------------------------------------------------ +// Derived collections +// ------------------------------------------------------------------------------------ + +const DEFAULT_MODEL_ENTRY = + Object.values(KNOWN_MODELS).find((model) => model.isDefault) ?? KNOWN_MODELS.SONNET; + +export const DEFAULT_MODEL = DEFAULT_MODEL_ENTRY.id; + +export const DEFAULT_WARM_MODELS = Object.values(KNOWN_MODELS) + .filter((model) => model.warm) + .map((model) => model.id); + +export const MODEL_ABBREVIATIONS = Object.fromEntries( + Object.values(KNOWN_MODELS) + .flatMap((model) => (model.aliases ?? []).map((alias) => [alias, model.id])) + .sort(([a], [b]) => a.localeCompare(b)) +) as Record; + +export const TOKENIZER_MODEL_OVERRIDES = Object.fromEntries( + Object.values(KNOWN_MODELS) + .filter((model) => Boolean(model.tokenizerOverride)) + .map((model) => [model.id, model.tokenizerOverride as string]) +) as Record; + +export const MODEL_NAMES = Object.entries(KNOWN_MODELS).reduce( + (acc, [key, model]) => { + if (!acc[model.provider]) { + acc[model.provider] = {} as Record; + } + acc[model.provider][key] = model.providerModelId; + return acc; + }, + {} as Record> +); diff --git a/src/services/historyService.ts b/src/services/historyService.ts index 5219c96de..4a310a0f5 100644 --- a/src/services/historyService.ts +++ b/src/services/historyService.ts @@ -7,6 +7,7 @@ import type { Config } from "@/config"; import { workspaceFileLocks } from "@/utils/concurrency/workspaceFileLocks"; import { log } from "./log"; import { getTokenizerForModel } from "@/utils/main/tokenizer"; +import { KNOWN_MODELS } from "@/constants/knownModels"; /** * HistoryService - Manages chat history persistence and sequence numbering @@ -340,7 +341,7 @@ export class HistoryService { } // Get tokenizer for counting (use a default model) - const tokenizer = await getTokenizerForModel("anthropic:claude-sonnet-4-5"); + const tokenizer = await getTokenizerForModel(KNOWN_MODELS.SONNET.id); // Count tokens for each message // We stringify the entire message for simplicity - only relative weights matter diff --git a/src/services/mock/mockScenarioPlayer.ts b/src/services/mock/mockScenarioPlayer.ts index 4735cea5a..9aac0f071 100644 --- a/src/services/mock/mockScenarioPlayer.ts +++ b/src/services/mock/mockScenarioPlayer.ts @@ -18,8 +18,9 @@ import type { StreamStartEvent, StreamDeltaEvent, StreamEndEvent } from "@/types import type { ToolCallStartEvent, ToolCallEndEvent } from "@/types/stream"; import type { ReasoningDeltaEvent } from "@/types/stream"; import { getTokenizerForModel } from "@/utils/main/tokenizer"; +import { KNOWN_MODELS } from "@/constants/knownModels"; -const MOCK_TOKENIZER_MODEL = "openai:gpt-5"; +const MOCK_TOKENIZER_MODEL = KNOWN_MODELS.GPT.id; const TOKENIZE_TIMEOUT_MS = 150; let tokenizerFallbackLogged = false; diff --git a/src/services/mock/scenarios/basicChat.ts b/src/services/mock/scenarios/basicChat.ts index 3127b634e..f401269f4 100644 --- a/src/services/mock/scenarios/basicChat.ts +++ b/src/services/mock/scenarios/basicChat.ts @@ -1,5 +1,6 @@ import type { ScenarioTurn } from "../scenarioTypes"; import { STREAM_BASE_DELAY } from "../scenarioTypes"; +import { KNOWN_MODELS } from "@/constants/knownModels"; export const LIST_PROGRAMMING_LANGUAGES = "List 3 programming languages"; @@ -12,7 +13,7 @@ const listProgrammingLanguagesTurn: ScenarioTurn = { assistant: { messageId: "msg-basic-1", events: [ - { kind: "stream-start", delay: 0, messageId: "msg-basic-1", model: "openai:gpt-5" }, + { kind: "stream-start", delay: 0, messageId: "msg-basic-1", model: KNOWN_MODELS.GPT.id }, { kind: "stream-delta", delay: STREAM_BASE_DELAY, @@ -37,7 +38,7 @@ const listProgrammingLanguagesTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 5, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 64, outputTokens: 48, systemMessageTokens: 12, diff --git a/src/services/mock/scenarios/permissionModes.ts b/src/services/mock/scenarios/permissionModes.ts index 7dab116d0..dc35cb9f1 100644 --- a/src/services/mock/scenarios/permissionModes.ts +++ b/src/services/mock/scenarios/permissionModes.ts @@ -1,4 +1,5 @@ import type { ScenarioTurn } from "../scenarioTypes"; +import { KNOWN_MODELS } from "@/constants/knownModels"; import { STREAM_BASE_DELAY } from "../scenarioTypes"; export const PERMISSION_MODE_PROMPTS = { @@ -19,7 +20,7 @@ const planRefactorTurn: ScenarioTurn = { kind: "stream-start", delay: 0, messageId: "msg-plan-refactor", - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, }, { kind: "stream-delta", @@ -45,7 +46,7 @@ const planRefactorTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 5, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 180, outputTokens: 130, systemMessageTokens: 24, @@ -74,7 +75,7 @@ const executePlanTurn: ScenarioTurn = { kind: "stream-start", delay: 0, messageId: "msg-exec-refactor", - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, }, { kind: "tool-start", @@ -118,7 +119,7 @@ const executePlanTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 3, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 220, outputTokens: 110, systemMessageTokens: 18, diff --git a/src/services/mock/scenarios/review.ts b/src/services/mock/scenarios/review.ts index 0015c4f58..a33c6dc52 100644 --- a/src/services/mock/scenarios/review.ts +++ b/src/services/mock/scenarios/review.ts @@ -1,4 +1,5 @@ import type { ScenarioTurn } from "../scenarioTypes"; +import { KNOWN_MODELS } from "@/constants/knownModels"; import { STREAM_BASE_DELAY } from "../scenarioTypes"; export const REVIEW_PROMPTS = { @@ -16,7 +17,7 @@ const summarizeBranchesTurn: ScenarioTurn = { assistant: { messageId: "msg-plan-1", events: [ - { kind: "stream-start", delay: 0, messageId: "msg-plan-1", model: "openai:gpt-5" }, + { kind: "stream-start", delay: 0, messageId: "msg-plan-1", model: KNOWN_MODELS.GPT.id }, { kind: "reasoning-delta", delay: STREAM_BASE_DELAY, @@ -61,7 +62,7 @@ const summarizeBranchesTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 6, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 128, outputTokens: 85, systemMessageTokens: 32, @@ -86,7 +87,7 @@ const openOnboardingDocTurn: ScenarioTurn = { assistant: { messageId: "msg-exec-1", events: [ - { kind: "stream-start", delay: 0, messageId: "msg-exec-1", model: "openai:gpt-5" }, + { kind: "stream-start", delay: 0, messageId: "msg-exec-1", model: KNOWN_MODELS.GPT.id }, { kind: "tool-start", delay: STREAM_BASE_DELAY, @@ -114,7 +115,7 @@ const showOnboardingDocTurn: ScenarioTurn = { assistant: { messageId: "msg-exec-2", events: [ - { kind: "stream-start", delay: 0, messageId: "msg-exec-2", model: "openai:gpt-5" }, + { kind: "stream-start", delay: 0, messageId: "msg-exec-2", model: KNOWN_MODELS.GPT.id }, { kind: "tool-start", delay: STREAM_BASE_DELAY, @@ -153,7 +154,7 @@ const showOnboardingDocTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 3, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 96, outputTokens: 142, systemMessageTokens: 32, diff --git a/src/services/mock/scenarios/slashCommands.ts b/src/services/mock/scenarios/slashCommands.ts index 087ac2ebb..627be57a4 100644 --- a/src/services/mock/scenarios/slashCommands.ts +++ b/src/services/mock/scenarios/slashCommands.ts @@ -1,4 +1,5 @@ import type { ScenarioTurn } from "../scenarioTypes"; +import { KNOWN_MODELS } from "@/constants/knownModels"; import { STREAM_BASE_DELAY } from "../scenarioTypes"; export const SLASH_COMMAND_PROMPTS = { @@ -24,7 +25,7 @@ const compactConversationTurn: ScenarioTurn = { kind: "stream-start", delay: 0, messageId: "msg-slash-compact-1", - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, }, { kind: "stream-delta", @@ -35,7 +36,7 @@ const compactConversationTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 2, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 220, outputTokens: 96, systemMessageTokens: 18, diff --git a/src/services/mock/scenarios/toolFlows.ts b/src/services/mock/scenarios/toolFlows.ts index 01ca24ae7..3da467993 100644 --- a/src/services/mock/scenarios/toolFlows.ts +++ b/src/services/mock/scenarios/toolFlows.ts @@ -1,4 +1,5 @@ import type { ScenarioTurn } from "../scenarioTypes"; +import { KNOWN_MODELS } from "@/constants/knownModels"; import { STREAM_BASE_DELAY } from "../scenarioTypes"; export const TOOL_FLOW_PROMPTS = { @@ -19,7 +20,7 @@ const fileReadTurn: ScenarioTurn = { assistant: { messageId: "msg-tool-file-read", events: [ - { kind: "stream-start", delay: 0, messageId: "msg-tool-file-read", model: "openai:gpt-5" }, + { kind: "stream-start", delay: 0, messageId: "msg-tool-file-read", model: KNOWN_MODELS.GPT.id }, { kind: "tool-start", delay: STREAM_BASE_DELAY, @@ -55,7 +56,7 @@ const fileReadTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 3, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 92, outputTokens: 64, systemMessageTokens: 18, @@ -78,7 +79,7 @@ const listDirectoryTurn: ScenarioTurn = { assistant: { messageId: "msg-tool-bash-ls", events: [ - { kind: "stream-start", delay: 0, messageId: "msg-tool-bash-ls", model: "openai:gpt-5" }, + { kind: "stream-start", delay: 0, messageId: "msg-tool-bash-ls", model: KNOWN_MODELS.GPT.id }, { kind: "tool-start", delay: STREAM_BASE_DELAY, @@ -122,7 +123,7 @@ const listDirectoryTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 3 + 500, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 74, outputTokens: 58, systemMessageTokens: 16, @@ -151,7 +152,7 @@ const createTestFileTurn: ScenarioTurn = { kind: "stream-start", delay: 0, messageId: "msg-tool-create-test-file", - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, }, { kind: "tool-start", @@ -181,7 +182,7 @@ const createTestFileTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 3, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 80, outputTokens: 40, systemMessageTokens: 12, @@ -205,7 +206,7 @@ const readTestFileTurn: ScenarioTurn = { kind: "stream-start", delay: 0, messageId: "msg-tool-read-test-file", - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, }, { kind: "tool-start", @@ -242,7 +243,7 @@ const readTestFileTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 3, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 76, outputTokens: 52, systemMessageTokens: 12, @@ -269,7 +270,7 @@ const recallTestFileTurn: ScenarioTurn = { kind: "stream-start", delay: 0, messageId: "msg-tool-recall-test-file", - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, }, { kind: "stream-delta", @@ -280,7 +281,7 @@ const recallTestFileTurn: ScenarioTurn = { kind: "stream-end", delay: STREAM_BASE_DELAY * 2, metadata: { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, inputTokens: 60, outputTokens: 34, systemMessageTokens: 10, diff --git a/src/services/streamManager.test.ts b/src/services/streamManager.test.ts index fd4b49e54..3232d7f22 100644 --- a/src/services/streamManager.test.ts +++ b/src/services/streamManager.test.ts @@ -1,4 +1,5 @@ import { describe, test, expect, beforeEach, mock } from "bun:test"; +import { KNOWN_MODELS } from "@/constants/knownModels"; import { StreamManager } from "./streamManager"; import type { HistoryService } from "./historyService"; import type { PartialService } from "./partialService"; @@ -84,7 +85,7 @@ describe("StreamManager - Concurrent Stream Prevention", () => { workspaceId, [{ role: "user", content: "Say hello and nothing else" }], model, - "anthropic:claude-sonnet-4-5", + KNOWN_MODELS.SONNET.id, 1, "You are a helpful assistant", runtime, @@ -102,7 +103,7 @@ describe("StreamManager - Concurrent Stream Prevention", () => { workspaceId, [{ role: "user", content: "Say goodbye and nothing else" }], model, - "anthropic:claude-sonnet-4-5", + KNOWN_MODELS.SONNET.id, 2, "You are a helpful assistant", runtime, @@ -274,7 +275,7 @@ describe("StreamManager - Concurrent Stream Prevention", () => { workspaceId, [{ role: "user", content: "test 1" }], model, - "anthropic:claude-sonnet-4-5", + KNOWN_MODELS.SONNET.id, 1, "system", runtime, @@ -285,7 +286,7 @@ describe("StreamManager - Concurrent Stream Prevention", () => { workspaceId, [{ role: "user", content: "test 2" }], model, - "anthropic:claude-sonnet-4-5", + KNOWN_MODELS.SONNET.id, 2, "system", runtime, @@ -296,7 +297,7 @@ describe("StreamManager - Concurrent Stream Prevention", () => { workspaceId, [{ role: "user", content: "test 3" }], model, - "anthropic:claude-sonnet-4-5", + KNOWN_MODELS.SONNET.id, 3, "system", runtime, @@ -379,7 +380,7 @@ describe("StreamManager - Unavailable Tool Handling", () => { messageId: "test-message-1", token: "test-token", startTime: Date.now(), - model: "anthropic:claude-sonnet-4-5", + model: KNOWN_MODELS.SONNET.id, historySequence: 1, parts: [], lastPartialWriteTime: 0, diff --git a/src/services/workspaceTitleGenerator.ts b/src/services/workspaceTitleGenerator.ts index 353b4ab50..e5c14c3a2 100644 --- a/src/services/workspaceTitleGenerator.ts +++ b/src/services/workspaceTitleGenerator.ts @@ -4,6 +4,7 @@ import type { Config } from "@/config"; import { log } from "./log"; import { createAnthropic } from "@ai-sdk/anthropic"; import { createOpenAI } from "@ai-sdk/openai"; +import { MODEL_NAMES } from "@/constants/knownModels"; const workspaceNameSchema = z.object({ name: z @@ -72,7 +73,7 @@ function getModelForTitleGeneration(modelString: string, config: Config): Langua const provider = createAnthropic({ apiKey: String(providersConfig.anthropic.apiKey), }); - return provider("claude-haiku-4-5"); + return provider(MODEL_NAMES.anthropic.HAIKU); } // Try OpenAI GPT-5-mini second @@ -80,7 +81,7 @@ function getModelForTitleGeneration(modelString: string, config: Config): Langua const provider = createOpenAI({ apiKey: String(providersConfig.openai.apiKey), }); - return provider("gpt-5-mini"); + return provider(MODEL_NAMES.openai.GPT_MINI); } // Parse user's model as fallback diff --git a/src/utils/ai/models.ts b/src/utils/ai/models.ts index 907414ba3..0f240ad64 100644 --- a/src/utils/ai/models.ts +++ b/src/utils/ai/models.ts @@ -2,7 +2,9 @@ * Model configuration and constants */ -export const defaultModel = "anthropic:claude-sonnet-4-5"; +import { DEFAULT_MODEL } from "@/constants/knownModels"; + +export const defaultModel = DEFAULT_MODEL; /** * Extract the model name from a model string (e.g., "anthropic:claude-sonnet-4-5" -> "claude-sonnet-4-5") diff --git a/src/utils/main/StreamingTokenTracker.test.ts b/src/utils/main/StreamingTokenTracker.test.ts index 584e2e2e1..feef3167c 100644 --- a/src/utils/main/StreamingTokenTracker.test.ts +++ b/src/utils/main/StreamingTokenTracker.test.ts @@ -1,4 +1,5 @@ import { beforeEach, describe, expect, jest, test } from "@jest/globals"; +import { KNOWN_MODELS } from "@/constants/knownModels"; import { StreamingTokenTracker } from "./StreamingTokenTracker"; jest.setTimeout(20000); @@ -12,32 +13,32 @@ describe("StreamingTokenTracker", () => { describe("countTokens", () => { test("returns 0 for empty string", async () => { - await tracker.setModel("anthropic:claude-sonnet-4-5"); + await tracker.setModel(KNOWN_MODELS.SONNET.id); expect(await tracker.countTokens("")).toBe(0); }); test("counts tokens in simple text", async () => { - await tracker.setModel("anthropic:claude-sonnet-4-5"); + await tracker.setModel(KNOWN_MODELS.SONNET.id); const count = await tracker.countTokens("Hello world"); expect(count).toBeGreaterThan(0); expect(count).toBeLessThan(10); // Reasonable upper bound }); test("counts tokens in longer text", async () => { - await tracker.setModel("anthropic:claude-sonnet-4-5"); + await tracker.setModel(KNOWN_MODELS.SONNET.id); const text = "This is a longer piece of text with more tokens"; const count = await tracker.countTokens(text); expect(count).toBeGreaterThan(5); }); test("handles special characters", async () => { - await tracker.setModel("anthropic:claude-sonnet-4-5"); + await tracker.setModel(KNOWN_MODELS.SONNET.id); const count = await tracker.countTokens("🚀 emoji test"); expect(count).toBeGreaterThan(0); }); test("is consistent for repeated calls", async () => { - await tracker.setModel("anthropic:claude-sonnet-4-5"); + await tracker.setModel(KNOWN_MODELS.SONNET.id); const text = "Test consistency"; const count1 = await tracker.countTokens(text); const count2 = await tracker.countTokens(text); @@ -47,7 +48,7 @@ describe("StreamingTokenTracker", () => { describe("setModel", () => { test("switches tokenizer for different models", async () => { - await tracker.setModel("anthropic:claude-sonnet-4-5"); + await tracker.setModel(KNOWN_MODELS.SONNET.id); const initial = await tracker.countTokens("test"); await tracker.setModel("openai:gpt-4"); diff --git a/src/utils/main/tokenizer.test.ts b/src/utils/main/tokenizer.test.ts index 0b8512fdd..8b19066e3 100644 --- a/src/utils/main/tokenizer.test.ts +++ b/src/utils/main/tokenizer.test.ts @@ -7,10 +7,11 @@ import { getTokenizerForModel, loadTokenizerModules, } from "./tokenizer"; +import { KNOWN_MODELS } from "@/constants/knownModels"; jest.setTimeout(20000); -const model = "openai:gpt-5"; +const model = KNOWN_MODELS.GPT.id; beforeAll(async () => { // warm up the worker_thread and tokenizer before running tests await expect(loadTokenizerModules([model])).resolves.toHaveLength(1); diff --git a/src/utils/main/tokenizer.ts b/src/utils/main/tokenizer.ts index ac1483a9a..7a5632c54 100644 --- a/src/utils/main/tokenizer.ts +++ b/src/utils/main/tokenizer.ts @@ -5,6 +5,10 @@ import { getAvailableTools, getToolSchemas } from "@/utils/tools/toolDefinitions import type { CountTokensInput } from "./tokenizer.worker"; import { models, type ModelName } from "ai-tokenizer"; import { run } from "./workerPool"; +import { + TOKENIZER_MODEL_OVERRIDES, + DEFAULT_WARM_MODELS, +} from "@/constants/knownModels"; /** * Public tokenizer interface exposed to callers. @@ -15,19 +19,6 @@ export interface Tokenizer { countTokens: (text: string) => Promise; } -const MODEL_KEY_OVERRIDES: Record = { - "anthropic:claude-sonnet-4-5": "anthropic/claude-sonnet-4.5", - // FIXME(ThomasK33): Temporary workaround since ai-tokenizer does not yet - // claude-haiku-4.5 - "anthropic:claude-haiku-4-5": "anthropic/claude-3.5-haiku", -}; - -const DEFAULT_WARM_MODELS = [ - "openai:gpt-5", - "openai:gpt-5-codex", - "anthropic:claude-sonnet-4-5", -] as const; - const encodingPromises = new Map>(); const inFlightCounts = new Map>(); const tokenCountCache = new LRUCache({ @@ -44,7 +35,7 @@ function normalizeModelKey(modelName: string): ModelName | null { "Model name must be a non-empty string" ); - const override = MODEL_KEY_OVERRIDES[modelName]; + const override = TOKENIZER_MODEL_OVERRIDES[modelName]; const normalized = override ?? (modelName.includes(":") ? modelName.replace(":", "/") : modelName); diff --git a/src/utils/messages/compactionOptions.test.ts b/src/utils/messages/compactionOptions.test.ts index b745052ea..d223ee4b7 100644 --- a/src/utils/messages/compactionOptions.test.ts +++ b/src/utils/messages/compactionOptions.test.ts @@ -5,10 +5,11 @@ import { applyCompactionOverrides } from "./compactionOptions"; import type { SendMessageOptions } from "@/types/ipc"; import type { CompactionRequestData } from "@/types/message"; +import { KNOWN_MODELS } from "@/constants/knownModels"; describe("applyCompactionOverrides", () => { const baseOptions: SendMessageOptions = { - model: "anthropic:claude-sonnet-4-5", + model: KNOWN_MODELS.SONNET.id, thinkingLevel: "medium", toolPolicy: [], mode: "exec", @@ -18,23 +19,23 @@ describe("applyCompactionOverrides", () => { const compactData: CompactionRequestData = {}; const result = applyCompactionOverrides(baseOptions, compactData); - expect(result.model).toBe("anthropic:claude-sonnet-4-5"); + expect(result.model).toBe(KNOWN_MODELS.SONNET.id); expect(result.mode).toBe("compact"); }); it("applies custom model override", () => { const compactData: CompactionRequestData = { - model: "anthropic:claude-haiku-4-5", + model: KNOWN_MODELS.HAIKU.id, }; const result = applyCompactionOverrides(baseOptions, compactData); - expect(result.model).toBe("anthropic:claude-haiku-4-5"); + expect(result.model).toBe(KNOWN_MODELS.HAIKU.id); }); it("preserves workspace thinking level for all models", () => { // Test Anthropic model const anthropicData: CompactionRequestData = { - model: "anthropic:claude-haiku-4-5", + model: KNOWN_MODELS.HAIKU.id, }; const anthropicResult = applyCompactionOverrides(baseOptions, anthropicData); expect(anthropicResult.thinkingLevel).toBe("medium"); @@ -78,12 +79,12 @@ describe("applyCompactionOverrides", () => { it("applies all overrides together", () => { const compactData: CompactionRequestData = { - model: "openai:gpt-5", + model: KNOWN_MODELS.GPT.id, maxOutputTokens: 5000, }; const result = applyCompactionOverrides(baseOptions, compactData); - expect(result.model).toBe("openai:gpt-5"); + expect(result.model).toBe(KNOWN_MODELS.GPT.id); expect(result.maxOutputTokens).toBe(5000); expect(result.mode).toBe("compact"); expect(result.thinkingLevel).toBe("medium"); // Non-Anthropic preserves original diff --git a/src/utils/slashCommands/compact.test.ts b/src/utils/slashCommands/compact.test.ts index e83e236ef..5013defc3 100644 --- a/src/utils/slashCommands/compact.test.ts +++ b/src/utils/slashCommands/compact.test.ts @@ -1,6 +1,7 @@ /** * Tests for compact command parser using minimist */ +import { KNOWN_MODELS } from "@/constants/knownModels"; import { parseCommand } from "./parser"; describe("compact command parser", () => { @@ -127,7 +128,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: undefined, continueMessage: undefined, - model: "anthropic:claude-sonnet-4-5", + model: KNOWN_MODELS.SONNET.id, }); }); @@ -137,7 +138,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: undefined, continueMessage: undefined, - model: "anthropic:claude-opus-4-1", + model: KNOWN_MODELS.OPUS.id, }); }); @@ -147,7 +148,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: 5000, continueMessage: "Keep going", - model: "anthropic:claude-haiku-4-5", + model: KNOWN_MODELS.HAIKU.id, }); }); @@ -157,7 +158,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: 3000, continueMessage: "Continue", - model: "anthropic:claude-opus-4-1", + model: KNOWN_MODELS.OPUS.id, }); }); @@ -177,7 +178,7 @@ describe("compact command parser", () => { type: "compact", maxOutputTokens: undefined, continueMessage: undefined, - model: "openai:gpt-5-codex", + model: KNOWN_MODELS.GPT_CODEX.id, }); }); @@ -328,7 +329,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: undefined, continueMessage: "Continue with the implementation", - model: "anthropic:claude-haiku-4-5", + model: KNOWN_MODELS.HAIKU.id, }); }); @@ -338,7 +339,7 @@ describe("multiline continue messages", () => { type: "compact", maxOutputTokens: 5000, continueMessage: "Finish the refactoring", - model: "anthropic:claude-sonnet-4-5", + model: KNOWN_MODELS.SONNET.id, }); }); }); diff --git a/src/utils/slashCommands/parser.test.ts b/src/utils/slashCommands/parser.test.ts index 33da57a8b..b22cc6483 100644 --- a/src/utils/slashCommands/parser.test.ts +++ b/src/utils/slashCommands/parser.test.ts @@ -1,4 +1,5 @@ import { describe, it, expect } from "bun:test"; +import { KNOWN_MODELS } from "@/constants/knownModels"; import { parseCommand } from "./parser"; // Test helpers @@ -122,11 +123,11 @@ describe("commandParser", () => { }); it("should parse /model with abbreviation", () => { - expectModelSet("/model opus", "anthropic:claude-opus-4-1"); + expectModelSet("/model opus", KNOWN_MODELS.OPUS.id); }); it("should parse /model with full provider:model format", () => { - expectModelSet("/model anthropic:claude-sonnet-4-5", "anthropic:claude-sonnet-4-5"); + expectModelSet("/model anthropic:claude-sonnet-4-5", KNOWN_MODELS.SONNET.id); }); it("should parse /model help when no args", () => { diff --git a/src/utils/slashCommands/parser_multiline.test.ts b/src/utils/slashCommands/parser_multiline.test.ts index 3c36c2bfe..b753f050e 100644 --- a/src/utils/slashCommands/parser_multiline.test.ts +++ b/src/utils/slashCommands/parser_multiline.test.ts @@ -1,6 +1,7 @@ /** * Tests to ensure multiline support doesn't break other commands */ +import { KNOWN_MODELS } from "@/constants/knownModels"; import { parseCommand } from "./parser"; describe("parser multiline compatibility", () => { @@ -28,7 +29,7 @@ describe("parser multiline compatibility", () => { const result = parseCommand("/model\nopus"); expect(result).toEqual({ type: "model-set", - modelString: "anthropic:claude-opus-4-1", + modelString: KNOWN_MODELS.OPUS.id, }); }); diff --git a/src/utils/slashCommands/registry.ts b/src/utils/slashCommands/registry.ts index 6b99013cd..ea7a0f89d 100644 --- a/src/utils/slashCommands/registry.ts +++ b/src/utils/slashCommands/registry.ts @@ -9,6 +9,7 @@ import type { SuggestionDefinition, } from "./types"; import minimist from "minimist"; +import { MODEL_ABBREVIATIONS } from "@/constants/knownModels"; /** * Parse multiline command input into first-line tokens and remaining message @@ -38,16 +39,8 @@ function parseMultilineCommand(rawInput: string): { }; } -// Model abbreviations for common models -// Order matters: first model becomes the default for new chats -export const MODEL_ABBREVIATIONS: Record = { - sonnet: "anthropic:claude-sonnet-4-5", - haiku: "anthropic:claude-haiku-4-5", - opus: "anthropic:claude-opus-4-1", - "gpt-5": "openai:gpt-5", - "gpt-5-pro": "openai:gpt-5-pro", - codex: "openai:gpt-5-codex", -}; +// Re-export MODEL_ABBREVIATIONS from constants for backwards compatibility +export { MODEL_ABBREVIATIONS }; // Provider configuration data const DEFAULT_PROVIDER_NAMES: SuggestionDefinition[] = [ diff --git a/src/utils/tokens/modelStats.test.ts b/src/utils/tokens/modelStats.test.ts index c9a38bfd9..3c8999889 100644 --- a/src/utils/tokens/modelStats.test.ts +++ b/src/utils/tokens/modelStats.test.ts @@ -1,17 +1,18 @@ import { describe, expect, test, it } from "bun:test"; import { getModelStats } from "./modelStats"; +import { KNOWN_MODELS } from "@/constants/knownModels"; describe("getModelStats", () => { describe("direct model lookups", () => { test("should find anthropic models by direct name", () => { - const stats = getModelStats("anthropic:claude-opus-4-1"); + const stats = getModelStats(KNOWN_MODELS.OPUS.id); expect(stats).not.toBeNull(); expect(stats?.max_input_tokens).toBeGreaterThan(0); expect(stats?.input_cost_per_token).toBeGreaterThan(0); }); test("should find openai models by direct name", () => { - const stats = getModelStats("openai:gpt-5"); + const stats = getModelStats(KNOWN_MODELS.GPT.id); expect(stats).not.toBeNull(); expect(stats?.max_input_tokens).toBeGreaterThan(0); }); @@ -89,7 +90,7 @@ describe("getModelStats", () => { describe("model without provider prefix", () => { test("should handle model string without provider", () => { - const stats = getModelStats("gpt-5"); + const stats = getModelStats("gpt-5.1"); expect(stats).not.toBeNull(); expect(stats?.max_input_tokens).toBeGreaterThan(0); }); @@ -97,7 +98,7 @@ describe("getModelStats", () => { describe("existing test cases", () => { it("should return model stats for claude-sonnet-4-5", () => { - const stats = getModelStats("anthropic:claude-sonnet-4-5"); + const stats = getModelStats(KNOWN_MODELS.SONNET.id); expect(stats).not.toBeNull(); expect(stats?.input_cost_per_token).toBe(0.000003); @@ -113,7 +114,7 @@ describe("getModelStats", () => { }); it("should return cache pricing when available", () => { - const stats = getModelStats("anthropic:claude-sonnet-4-5"); + const stats = getModelStats(KNOWN_MODELS.SONNET.id); expect(stats?.cache_creation_input_token_cost).toBe(0.00000375); expect(stats?.cache_read_input_token_cost).toBe(3e-7); @@ -128,7 +129,7 @@ describe("getModelStats", () => { describe("model data validation", () => { test("should include cache costs when available", () => { - const stats = getModelStats("anthropic:claude-opus-4-1"); + const stats = getModelStats(KNOWN_MODELS.OPUS.id); // Anthropic models have cache costs if (stats) { expect(stats.cache_creation_input_token_cost).toBeDefined(); diff --git a/src/utils/tokens/models.json b/src/utils/tokens/models.json index 6b21fc735..cacb24c1f 100644 --- a/src/utils/tokens/models.json +++ b/src/utils/tokens/models.json @@ -20,7 +20,13 @@ "search_context_size_low": 0, "search_context_size_medium": 0 }, - "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], + "supported_regions": [ + "global", + "us-west-2", + "eu-west-1", + "ap-southeast-1", + "ap-northeast-1" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -139,7 +145,9 @@ "mode": "image_generation", "output_cost_per_image": 0.021, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/dall-e-3": { "litellm_provider": "aiml", @@ -149,7 +157,9 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro": { "litellm_provider": "aiml", @@ -159,19 +169,25 @@ "mode": "image_generation", "output_cost_per_image": 0.053, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro/v1.1": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.042, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro/v1.1-ultra": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.063, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-realism": { "litellm_provider": "aiml", @@ -181,7 +197,9 @@ "mode": "image_generation", "output_cost_per_image": 0.037, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/dev": { "litellm_provider": "aiml", @@ -191,7 +209,9 @@ "mode": "image_generation", "output_cost_per_image": 0.026, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/kontext-max/text-to-image": { "litellm_provider": "aiml", @@ -201,7 +221,9 @@ "mode": "image_generation", "output_cost_per_image": 0.084, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/kontext-pro/text-to-image": { "litellm_provider": "aiml", @@ -211,7 +233,9 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/schnell": { "litellm_provider": "aiml", @@ -221,7 +245,9 @@ "mode": "image_generation", "output_cost_per_image": 0.003, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -1022,9 +1048,16 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -1053,9 +1086,16 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -1169,8 +1209,14 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1565,9 +1611,18 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1590,9 +1645,18 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1614,9 +1678,18 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1639,9 +1712,18 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1663,9 +1745,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1687,9 +1778,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1792,9 +1892,17 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1847,9 +1955,17 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1887,7 +2003,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "azure/gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -1896,9 +2014,16 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/speech"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["audio"] + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] }, "azure/gpt-4o-realtime-preview-2024-10-01": { "cache_creation_input_audio_token_cost": 0.00002, @@ -1930,8 +2055,14 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00002, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1947,7 +2078,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "azure/gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -1958,9 +2091,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1981,9 +2123,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2005,9 +2156,18 @@ "mode": "chat", "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2028,9 +2188,18 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2051,9 +2220,16 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2074,9 +2250,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2097,9 +2282,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2120,9 +2314,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2143,9 +2346,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2166,9 +2378,16 @@ "mode": "responses", "output_cost_per_token": 0.00012, "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5", - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -2184,7 +2403,9 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, @@ -2209,133 +2430,171 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/gpt-image-1-mini": { "input_cost_per_pixel": 8.0566406e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0345052083e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 7.9752604167e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.1575520833e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/mistral-large-2402": { "input_cost_per_token": 0.000008, @@ -2457,9 +2716,18 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2478,9 +2746,18 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2498,9 +2775,18 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -2551,9 +2837,18 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2572,9 +2867,18 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2592,9 +2896,18 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2790,8 +3103,14 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2893,14 +3212,18 @@ "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure_ai/FLUX.1-Kontext-pro": { "litellm_provider": "azure_ai", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure_ai/Llama-3.2-11B-Vision-Instruct": { "input_cost_per_token": 3.7e-7, @@ -3183,28 +3506,36 @@ "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" }, "azure_ai/doc-intelligence/prebuilt-read": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.0015, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-layout": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-document": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/MAI-DS-R1": { @@ -3296,8 +3627,13 @@ "output_cost_per_token": 0, "output_vector_size": 3072, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image" + ], "supports_embedding_image_input": true }, "azure_ai/global/grok-3": { @@ -5329,9 +5665,16 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -5536,9 +5879,16 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -5558,7 +5908,9 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": ["/v1/chat/completions"], + "supported_endpoints": [ + "/v1/chat/completions" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -5577,7 +5929,9 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": ["/v1/chat/completions"], + "supported_endpoints": [ + "/v1/chat/completions" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -5614,12 +5968,18 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5637,12 +5997,18 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5728,13 +6094,19 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5753,13 +6125,19 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5778,13 +6156,19 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5869,25 +6253,37 @@ "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "cache_read_input_token_cost": 1.2e-7, "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "cache_read_input_token_cost": 2e-7, "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "cache_read_input_token_cost": 4e-7, "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5905,22 +6301,34 @@ { "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5939,25 +6347,37 @@ "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "cache_read_input_token_cost": 1.8e-7, "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "cache_read_input_token_cost": 6e-7, "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5975,22 +6395,34 @@ { "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -6008,17 +6440,26 @@ { "input_cost_per_token": 0.0000012, "output_cost_per_token": 0.000006, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "input_cost_per_token": 0.0000024, "output_cost_per_token": 0.000012, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [128000, 252000] + "range": [ + 128000, + 252000 + ] } ] }, @@ -6236,7 +6677,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-conversationalai": { "input_cost_per_second": 0.00020833, @@ -6248,7 +6691,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-finance": { "input_cost_per_second": 0.00020833, @@ -6260,7 +6705,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-general": { "input_cost_per_second": 0.00020833, @@ -6272,7 +6719,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-meeting": { "input_cost_per_second": 0.00020833, @@ -6284,7 +6733,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-phonecall": { "input_cost_per_second": 0.00020833, @@ -6296,7 +6747,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-video": { "input_cost_per_second": 0.00020833, @@ -6308,7 +6761,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-voicemail": { "input_cost_per_second": 0.00020833, @@ -6320,7 +6775,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced": { "input_cost_per_second": 0.00024167, @@ -6332,7 +6789,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-finance": { "input_cost_per_second": 0.00024167, @@ -6344,7 +6803,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-general": { "input_cost_per_second": 0.00024167, @@ -6356,7 +6817,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-meeting": { "input_cost_per_second": 0.00024167, @@ -6368,7 +6831,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-phonecall": { "input_cost_per_second": 0.00024167, @@ -6380,7 +6845,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova": { "input_cost_per_second": 0.00007167, @@ -6392,7 +6859,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2": { "input_cost_per_second": 0.00007167, @@ -6404,7 +6873,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-atc": { "input_cost_per_second": 0.00007167, @@ -6416,7 +6887,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-automotive": { "input_cost_per_second": 0.00007167, @@ -6428,7 +6901,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-conversationalai": { "input_cost_per_second": 0.00007167, @@ -6440,7 +6915,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-drivethru": { "input_cost_per_second": 0.00007167, @@ -6452,7 +6929,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-finance": { "input_cost_per_second": 0.00007167, @@ -6464,7 +6943,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-general": { "input_cost_per_second": 0.00007167, @@ -6476,7 +6957,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-meeting": { "input_cost_per_second": 0.00007167, @@ -6488,7 +6971,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-phonecall": { "input_cost_per_second": 0.00007167, @@ -6500,7 +6985,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-video": { "input_cost_per_second": 0.00007167, @@ -6512,7 +6999,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-voicemail": { "input_cost_per_second": 0.00007167, @@ -6524,7 +7013,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3": { "input_cost_per_second": 0.00007167, @@ -6536,7 +7027,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3-general": { "input_cost_per_second": 0.00007167, @@ -6548,7 +7041,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3-medical": { "input_cost_per_second": 0.00008667, @@ -6560,7 +7055,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-general": { "input_cost_per_second": 0.00007167, @@ -6572,7 +7069,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-phonecall": { "input_cost_per_second": 0.00007167, @@ -6584,7 +7083,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper": { "input_cost_per_second": 0.0001, @@ -6595,7 +7096,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-base": { "input_cost_per_second": 0.0001, @@ -6606,7 +7109,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-large": { "input_cost_per_second": 0.0001, @@ -6617,7 +7122,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-medium": { "input_cost_per_second": 0.0001, @@ -6628,7 +7135,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-small": { "input_cost_per_second": 0.0001, @@ -6639,7 +7148,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-tiny": { "input_cost_per_second": 0.0001, @@ -6650,7 +7161,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepinfra/Gryphe/MythoMax-L2-13b": { "max_tokens": 4096, @@ -7492,11 +8005,17 @@ "tiered_pricing": [ { "input_cost_per_query": 0.005, - "max_results_range": [0, 25] + "max_results_range": [ + 0, + 25 + ] }, { "input_cost_per_query": 0.025, - "max_results_range": [26, 100] + "max_results_range": [ + 26, + 100 + ] } ] }, @@ -7506,43 +8025,73 @@ "tiered_pricing": [ { "input_cost_per_query": 0.00166, - "max_results_range": [1, 10] + "max_results_range": [ + 1, + 10 + ] }, { "input_cost_per_query": 0.00332, - "max_results_range": [11, 20] + "max_results_range": [ + 11, + 20 + ] }, { "input_cost_per_query": 0.00498, - "max_results_range": [21, 30] + "max_results_range": [ + 21, + 30 + ] }, { "input_cost_per_query": 0.00664, - "max_results_range": [31, 40] + "max_results_range": [ + 31, + 40 + ] }, { "input_cost_per_query": 0.0083, - "max_results_range": [41, 50] + "max_results_range": [ + 41, + 50 + ] }, { "input_cost_per_query": 0.00996, - "max_results_range": [51, 60] + "max_results_range": [ + 51, + 60 + ] }, { "input_cost_per_query": 0.01162, - "max_results_range": [61, 70] + "max_results_range": [ + 61, + 70 + ] }, { "input_cost_per_query": 0.01328, - "max_results_range": [71, 80] + "max_results_range": [ + 71, + 80 + ] }, { "input_cost_per_query": 0.01494, - "max_results_range": [81, 90] + "max_results_range": [ + 81, + 90 + ] }, { "input_cost_per_query": 0.0166, - "max_results_range": [91, 100] + "max_results_range": [ + 91, + 100 + ] } ], "metadata": { @@ -7573,7 +8122,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "elevenlabs/scribe_v1_experimental": { "input_cost_per_second": 0.0000611, @@ -7586,7 +8137,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "embed-english-light-v2.0": { "input_cost_per_token": 1e-7, @@ -7958,31 +8511,41 @@ "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/flux-pro/v1.1-ultra": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/imagen4/preview": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/recraft/v3/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "fal_ai/fal-ai/stable-diffusion-v35-medium": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "featherless_ai/featherless-ai/Qwerky-72B": { "litellm_provider": "featherless_ai", @@ -9045,8 +9608,16 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9077,8 +9648,16 @@ "mode": "chat", "output_cost_per_token": 6e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9117,8 +9696,16 @@ "output_cost_per_token": 6e-7, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9145,8 +9732,15 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9174,8 +9768,15 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9207,9 +9808,20 @@ "output_cost_per_token": 0.000002, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -9240,8 +9852,16 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9281,8 +9901,16 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9321,12 +9949,21 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": false, "supports_parallel_function_calling": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": false, "supports_system_messages": true, "supports_tool_choice": true, @@ -9351,9 +9988,19 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9384,9 +10031,20 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9420,9 +10078,21 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9456,9 +10126,21 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9490,9 +10172,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9524,9 +10217,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9558,9 +10262,20 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9592,9 +10307,20 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_token": 0.000002, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9627,9 +10353,20 @@ "output_cost_per_token": 0.000002, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9662,9 +10399,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9696,9 +10444,20 @@ "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9729,9 +10488,20 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9764,9 +10534,19 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -9797,9 +10577,19 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9831,9 +10621,20 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9865,10 +10666,23 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], - "supported_regions": ["global"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supported_regions": [ + "global" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9900,9 +10714,20 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9934,8 +10759,12 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10380,8 +11209,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10412,8 +11249,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10453,8 +11298,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10482,8 +11335,15 @@ "output_cost_per_token": 3e-7, "rpm": 4000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10512,8 +11372,15 @@ "output_cost_per_token": 3e-7, "rpm": 60000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10545,9 +11412,20 @@ "output_cost_per_token": 0.0000015, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10579,8 +11457,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10621,8 +11507,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10662,11 +11556,20 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, @@ -10734,9 +11637,20 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10758,6 +11672,7 @@ "litellm_provider": "vertex_ai-language-models", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, + "supports_reasoning": false, "max_images_per_prompt": 3000, "max_input_tokens": 32768, "max_output_tokens": 32768, @@ -10771,9 +11686,21 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10807,9 +11734,21 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10842,9 +11781,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10878,9 +11828,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10914,9 +11875,20 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10950,10 +11922,21 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], - "supports_audio_output": false, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -10986,9 +11969,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11022,9 +12016,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11058,9 +12063,19 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11092,9 +12107,19 @@ "output_cost_per_token": 0.0000025, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11127,9 +12152,16 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11160,9 +12192,19 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -11195,9 +12237,19 @@ "output_cost_per_token_above_200k_tokens": 0, "rpm": 5, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -11230,8 +12282,15 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11263,8 +12322,15 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11297,8 +12363,15 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11331,8 +12404,12 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11559,8 +12636,12 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.0-fast-generate-preview": { "litellm_provider": "gemini", @@ -11569,8 +12650,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.0-generate-preview": { "litellm_provider": "gemini", @@ -11579,8 +12664,12 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.1-fast-generate-preview": { "litellm_provider": "gemini", @@ -11589,8 +12678,12 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "gemini/veo-3.1-generate-preview": { "litellm_provider": "gemini", @@ -11599,8 +12692,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "google_pse/search": { "input_cost_per_query": 0.005, @@ -11993,9 +13090,18 @@ "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, "output_cost_per_token_priority": 0.000014, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12018,9 +13124,18 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12046,9 +13161,18 @@ "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "output_cost_per_token_priority": 0.0000028, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12071,9 +13195,18 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12099,9 +13232,18 @@ "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, "output_cost_per_token_priority": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12124,9 +13266,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12509,7 +13660,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -12518,9 +13671,16 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/speech"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["audio"] + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] }, "gpt-4o-realtime-preview": { "cache_read_input_token_cost": 0.0000025, @@ -12649,7 +13809,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -12666,9 +13828,90 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.1": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true + }, + "gpt-5.1-2025-11-13": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12681,6 +13924,41 @@ "supports_service_tier": true, "supports_vision": true }, + "gpt-5.1-chat-latest": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_function_calling": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": false, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": false, + "supports_vision": true + }, "gpt-5-pro": { "input_cost_per_token": 0.000015, "input_cost_per_token_batches": 0.0000075, @@ -12691,9 +13969,17 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": ["/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -12716,9 +14002,17 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": ["/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -12746,9 +14040,18 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12770,9 +14073,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -12793,9 +14105,18 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -12816,9 +14137,82 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.1-codex": { + "cache_read_input_token_cost": 1.25e-7, + "cache_read_input_token_cost_priority": 2.5e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_priority": 0.0000025, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.00001, + "output_cost_per_token_priority": 0.00002, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": false, + "supports_tool_choice": true, + "supports_vision": true + }, + "gpt-5.1-codex-mini": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "openai", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "responses", + "output_cost_per_token": 0.000002, + "output_cost_per_token_priority": 0.0000036, + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12845,9 +14239,18 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12875,9 +14278,18 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12903,9 +14315,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12929,9 +14350,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12948,7 +14378,9 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "gpt-image-1-mini": { "cache_read_input_image_token_cost": 2.5e-7, @@ -12958,7 +14390,10 @@ "litellm_provider": "openai", "mode": "chat", "output_cost_per_image_token": 0.000008, - "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, @@ -12973,9 +14408,18 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -12995,9 +14439,18 @@ "mode": "chat", "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 0.0000024, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13018,9 +14471,18 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13032,8 +14494,12 @@ "litellm_provider": "gradient_ai", "max_tokens": 2048, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3-opus": { @@ -13042,8 +14508,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000075, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-haiku": { @@ -13052,8 +14522,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000004, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-sonnet": { @@ -13062,8 +14536,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.7-sonnet": { @@ -13072,8 +14550,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/deepseek-r1-distill-llama-70b": { @@ -13082,8 +14564,12 @@ "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 9.9e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/llama3-8b-instruct": { @@ -13092,8 +14578,12 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 2e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/llama3.3-70b-instruct": { @@ -13102,8 +14592,12 @@ "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 6.5e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/mistral-nemo-instruct-2407": { @@ -13112,24 +14606,36 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 3e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o-mini": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-o3": { @@ -13138,8 +14644,12 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-o3-mini": { @@ -13148,8 +14658,12 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { @@ -13632,21 +15146,27 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { "input_cost_per_token": 1.2e-7, @@ -14302,21 +15822,27 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "luminous-base": { "input_cost_per_token": 0.00003, @@ -14379,57 +15905,75 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.005, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.011, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medlm-large": { "input_cost_per_character": 0.000005, @@ -14589,8 +16133,14 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -14604,8 +16154,14 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -14616,8 +16172,12 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -14628,8 +16188,12 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -14640,8 +16204,13 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -14652,8 +16221,13 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -14801,12 +16375,29 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "mistral/magistral-medium-2509": { + "input_cost_per_token": 0.000002, + "litellm_provider": "mistral", + "max_input_tokens": 40000, + "max_output_tokens": 40000, + "max_tokens": 40000, + "mode": "chat", + "output_cost_per_token": 0.000005, + "source": "https://mistral.ai/news/magistral", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "mistral/mistral-ocr-latest": { "litellm_provider": "mistral", "ocr_cost_per_page": 0.001, "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/mistral-ocr-2505-completion": { @@ -14814,7 +16405,9 @@ "ocr_cost_per_page": 0.001, "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/magistral-medium-latest": { @@ -15222,6 +16815,20 @@ "source": "https://platform.moonshot.ai/docs/pricing", "supports_vision": true }, + "moonshot/kimi-k2-thinking": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 6e-7, + "litellm_provider": "moonshot", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "source": "https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_web_search": true + }, "moonshot/moonshot-v1-128k": { "input_cost_per_token": 0.000002, "litellm_provider": "moonshot", @@ -15387,8 +16994,14 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image", "video"] + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] }, "multimodalembedding@001": { "input_cost_per_character": 2e-7, @@ -15404,8 +17017,14 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image", "video"] + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] }, "nscale/Qwen/QwQ-32B": { "input_cost_per_token": 1.8e-7, @@ -15441,7 +17060,9 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 3.75e-7, @@ -15546,7 +17167,9 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "o1": { "cache_read_input_token_cost": 0.0000075, @@ -15652,9 +17275,17 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -15676,9 +17307,17 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -15711,8 +17350,13 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -15738,8 +17382,13 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -15761,9 +17410,18 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15785,9 +17443,18 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15842,9 +17509,17 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -15864,9 +17539,17 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -15931,9 +17614,18 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -15955,9 +17647,18 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16784,6 +18485,21 @@ "supports_reasoning": true, "supports_tool_choice": true }, + "openrouter/deepseek/deepseek-v3.2-exp": { + "input_cost_per_token": 2e-7, + "input_cost_per_token_cache_hit": 2e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 4e-7, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": false, + "supports_tool_choice": true + }, "openrouter/deepseek/deepseek-coder": { "input_cost_per_token": 1.4e-7, "litellm_provider": "openrouter", @@ -17027,6 +18743,19 @@ "output_cost_per_token": 0.000001, "supports_tool_choice": true }, + "openrouter/minimax/minimax-m2": { + "input_cost_per_token": 2.55e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 204800, + "max_output_tokens": 204800, + "max_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0.00000102, + "supports_function_calling": true, + "supports_prompt_caching": false, + "supports_reasoning": true, + "supports_tool_choice": true + }, "openrouter/mistralai/mistral-7b-instruct": { "input_cost_per_token": 1.3e-7, "litellm_provider": "openrouter", @@ -17255,8 +18984,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17269,8 +19003,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17283,8 +19022,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17297,8 +19041,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17311,8 +19060,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17473,15 +19227,16 @@ "supports_vision": true }, "openrouter/qwen/qwen3-coder": { - "input_cost_per_token": 0.000001, + "input_cost_per_token": 2.2e-7, "litellm_provider": "openrouter", - "max_input_tokens": 1000000, - "max_output_tokens": 1000000, - "max_tokens": 1000000, + "max_input_tokens": 262100, + "max_output_tokens": 262100, + "max_tokens": 262100, "mode": "chat", - "output_cost_per_token": 0.000005, + "output_cost_per_token": 9.5e-7, "source": "https://openrouter.ai/qwen/qwen3-coder", - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_function_calling": true }, "openrouter/switchpoint/router": { "input_cost_per_token": 8.5e-7, @@ -17530,6 +19285,32 @@ "supports_tool_choice": true, "supports_web_search": false }, + "openrouter/z-ai/glm-4.6": { + "input_cost_per_token": 4e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 202800, + "max_output_tokens": 131000, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.00000175, + "source": "https://openrouter.ai/z-ai/glm-4.6", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, + "openrouter/z-ai/glm-4.6:exacto": { + "input_cost_per_token": 4.5e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 202800, + "max_output_tokens": 131000, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.0000019, + "source": "https://openrouter.ai/z-ai/glm-4.6:exacto", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "ovhcloud/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 6.7e-7, "litellm_provider": "ovhcloud", @@ -18120,14 +19901,18 @@ "mode": "image_generation", "output_cost_per_image": 0.022, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "recraft/recraftv3": { "litellm_provider": "recraft", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "replicate/meta/llama-2-13b": { "input_cost_per_token": 1e-7, @@ -19455,13 +21240,17 @@ "input_cost_per_character": 0.000015, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": ["/v1/audio/speech"] + "supported_endpoints": [ + "/v1/audio/speech" + ] }, "tts-1-hd": { "input_cost_per_character": 0.00003, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": ["/v1/audio/speech"] + "supported_endpoints": [ + "/v1/audio/speech" + ] }, "us.amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -19890,8 +21679,14 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -19905,8 +21700,14 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -21307,7 +23108,9 @@ "mode": "chat", "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_regions": ["us-west2"], + "supported_regions": [ + "us-west2" + ], "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -21488,8 +23291,14 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -21502,8 +23311,14 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -21516,8 +23331,14 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -21530,8 +23351,14 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -21717,7 +23544,9 @@ "litellm_provider": "vertex_ai", "mode": "ocr", "ocr_cost_per_page": 0.0005, - "supported_endpoints": ["/v1/ocr"], + "supported_endpoints": [ + "/v1/ocr" + ], "source": "https://cloud.google.com/generative-ai-app-builder/pricing" }, "vertex_ai/openai/gpt-oss-120b-maas": { @@ -21797,8 +23626,12 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -21807,8 +23640,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -21817,8 +23654,12 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.1-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -21827,8 +23668,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.1-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -21837,8 +23682,12 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "voyage/rerank-2": { "input_cost_per_query": 5e-8, @@ -22445,7 +24294,9 @@ "litellm_provider": "openai", "mode": "audio_transcription", "output_cost_per_second": 0.0001, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "xai/grok-2": { "input_cost_per_token": 0.000002, @@ -22844,44 +24695,192 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "openai/sora-2-pro": { "litellm_provider": "openai", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "azure/sora-2": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "azure/sora-2-pro": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["720x1280", "1280x720"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "720x1280", + "1280x720" + ] }, "azure/sora-2-pro-high-res": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.5, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"], - "supported_resolutions": ["1024x1792", "1792x1024"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1024x1792", + "1792x1024" + ] + }, + "runwayml/gen4_turbo": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], + "metadata": { + "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" + } + }, + "runwayml/gen4_aleph": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.15, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], + "metadata": { + "comment": "15 credits per second @ $0.01 per credit = $0.15 per second" + } + }, + "runwayml/gen3a_turbo": { + "litellm_provider": "runwayml", + "mode": "video_generation", + "output_cost_per_video_per_second": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "video" + ], + "supported_resolutions": [ + "1280x720", + "720x1280" + ], + "metadata": { + "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" + } + }, + "runwayml/gen4_image": { + "litellm_provider": "runwayml", + "mode": "image_generation", + "input_cost_per_image": 0.05, + "output_cost_per_image": 0.05, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "image" + ], + "supported_resolutions": [ + "1280x720", + "1920x1080" + ], + "metadata": { + "comment": "5 credits per 720p image or 8 credits per 1080p image @ $0.01 per credit. Using 5 credits ($0.05) as base cost" + } + }, + "runwayml/gen4_image_turbo": { + "litellm_provider": "runwayml", + "mode": "image_generation", + "input_cost_per_image": 0.02, + "output_cost_per_image": 0.02, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "image" + ], + "supported_resolutions": [ + "1280x720", + "1920x1080" + ], + "metadata": { + "comment": "2 credits per image (any resolution) @ $0.01 per credit = $0.02 per image" + } + }, + "runwayml/eleven_multilingual_v2": { + "litellm_provider": "runwayml", + "mode": "audio_speech", + "input_cost_per_character": 3e-7, + "source": "https://docs.dev.runwayml.com/guides/pricing/", + "metadata": { + "comment": "Estimated cost based on standard TTS pricing. RunwayML uses ElevenLabs models." + } } -} +} \ No newline at end of file diff --git a/tests/models/knownModels.test.ts b/tests/models/knownModels.test.ts new file mode 100644 index 000000000..41b5b42cc --- /dev/null +++ b/tests/models/knownModels.test.ts @@ -0,0 +1,44 @@ +/** + * Integration test for known models - verifies all models exist in models.json + * + * This test does NOT go through IPC - it directly uses data from models.json + * to verify that every providerModelId in KNOWN_MODELS exists. + */ + +import { describe, test, expect } from "@jest/globals"; +import { KNOWN_MODELS } from "@/constants/knownModels"; +import modelsJson from "@/utils/tokens/models.json"; + +describe("Known Models Integration", () => { + test("all known models exist in models.json", () => { + const missingModels: string[] = []; + + for (const [key, model] of Object.entries(KNOWN_MODELS)) { + const modelId = model.providerModelId; + + // Check if model exists in models.json + if (!(modelId in modelsJson)) { + missingModels.push(`${key}: ${model.provider}:${modelId}`); + } + } + + // Report all missing models at once for easier debugging + if (missingModels.length > 0) { + throw new Error( + `The following known models are missing from models.json:\n${missingModels.join("\n")}\n\n` + + `Run 'bun scripts/update_models.ts' to refresh models.json from LiteLLM.` + ); + } + }); + + test("all known models have required metadata", () => { + for (const [key, model] of Object.entries(KNOWN_MODELS)) { + const modelId = model.providerModelId; + const modelData = modelsJson[modelId as keyof typeof modelsJson] as Record; + + expect(modelData).toBeDefined(); + // Check that basic metadata fields exist (not all models have all fields) + expect(typeof modelData.litellm_provider).toBe("string"); + } + }); +}); From 2d55122b810a9532a0c327bde31235181a7d9316 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 14 Nov 2025 02:13:33 +0000 Subject: [PATCH 2/9] =?UTF-8?q?=F0=9F=A4=96=20fix:=20only=20pass=20previou?= =?UTF-8?q?sResponseId=20for=20same=20model=20with=20reasoning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes APICallError when switching models or using non-reasoning models. Previously, we would pass previousResponseId from any previous OpenAI assistant message, causing errors when: - Switching between models (e.g., gpt-5.1-codex → gpt-5-pro) - Using models without reasoning support - Response IDs expired or invalid Now we: 1. Only extract previousResponseId when current model uses reasoning 2. Only use it if the previous message was from the same model 3. Stop searching if we encounter a different model (conversation context changed) This prevents "Previous response with id 'resp_...' not found" errors when the response ID is invalid for the current model/context. Generated with `mux` --- src/utils/ai/providerOptions.ts | 44 +++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/utils/ai/providerOptions.ts b/src/utils/ai/providerOptions.ts index 3e1190941..e8228b1bb 100644 --- a/src/utils/ai/providerOptions.ts +++ b/src/utils/ai/providerOptions.ts @@ -119,18 +119,42 @@ export function buildProviderOptions( const reasoningEffort = OPENAI_REASONING_EFFORT[effectiveThinking]; // Extract previousResponseId from last assistant message for persistence + // IMPORTANT: Only use previousResponseId if: + // 1. The previous message used the same model (prevents cross-model contamination) + // 2. That model uses reasoning (reasoning effort is set) + // 3. The response ID exists let previousResponseId: string | undefined; - if (messages && messages.length > 0) { - // Find last assistant message + if (messages && messages.length > 0 && reasoningEffort) { + // Parse current model name (without provider prefix) + const [, currentModelName] = modelString.split(":"); + + // Find last assistant message from the same model for (let i = messages.length - 1; i >= 0; i--) { - if (messages[i].role === "assistant") { - const metadata = messages[i].metadata?.providerMetadata; - if (metadata && "openai" in metadata) { - const openaiData = metadata.openai as Record | undefined; - previousResponseId = openaiData?.responseId as string | undefined; - } - if (previousResponseId) { - log.debug("buildProviderOptions: Found previousResponseId", { previousResponseId }); + const msg = messages[i]; + if (msg.role === "assistant") { + // Check if this message is from the same model + const msgModel = msg.metadata?.model; + const [, msgModelName] = msgModel?.split(":") ?? []; + + if (msgModelName === currentModelName) { + const metadata = msg.metadata?.providerMetadata; + if (metadata && "openai" in metadata) { + const openaiData = metadata.openai as Record | undefined; + previousResponseId = openaiData?.responseId as string | undefined; + } + if (previousResponseId) { + log.debug("buildProviderOptions: Found previousResponseId from same model", { + previousResponseId, + model: currentModelName, + }); + break; + } + } else if (msgModelName) { + // Found assistant message from different model, stop searching + log.debug("buildProviderOptions: Skipping previousResponseId - model changed", { + previousModel: msgModelName, + currentModel: currentModelName, + }); break; } } From edb49ce7b497027f83c5779025175b41d6f9a0bc Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 14 Nov 2025 02:21:11 +0000 Subject: [PATCH 3/9] =?UTF-8?q?=F0=9F=A4=96=20test:=20add=20multi-turn=20t?= =?UTF-8?q?est=20for=20codex=20+=20use=20knownModels=20constants?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add integration test for multi-turn conversations with reasoning models - Verifies previousResponseId is passed correctly between turns - Tests GPT_CODEX specifically to catch response ID bugs - Validates responseId exists in assistant message metadata - Update PROVIDER_CONFIGS to use KNOWN_MODELS constants - Ensures tests use the same model IDs as production code - Prevents drift between test models and known models Generated with `mux` --- tests/ipcMain/sendMessage.test.ts | 59 +++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/tests/ipcMain/sendMessage.test.ts b/tests/ipcMain/sendMessage.test.ts index 544ec8cda..f53f4b065 100644 --- a/tests/ipcMain/sendMessage.test.ts +++ b/tests/ipcMain/sendMessage.test.ts @@ -30,10 +30,12 @@ if (shouldRunIntegrationTests()) { validateApiKeys(["OPENAI_API_KEY", "ANTHROPIC_API_KEY"]); } +import { KNOWN_MODELS } from "@/constants/knownModels"; + // Test both providers with their respective models const PROVIDER_CONFIGS: Array<[string, string]> = [ - ["openai", "gpt-5-codex"], - ["anthropic", "claude-sonnet-4-5"], + ["openai", KNOWN_MODELS.GPT_CODEX.providerModelId], + ["anthropic", KNOWN_MODELS.SONNET.providerModelId], ]; // Integration test timeout guidelines: @@ -1587,3 +1589,56 @@ describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => { 40000 ); }); + + // Test multi-turn conversation specifically for reasoning models (codex) + test.concurrent( + "should handle multi-turn conversation with response ID persistence (openai reasoning models)", + async () => { + const { env, workspaceId, cleanup } = await setupWorkspace("openai"); + try { + // First message + const result1 = await sendMessageWithModel( + env.mockIpcRenderer, + workspaceId, + "What is 2+2?", + "openai", + KNOWN_MODELS.GPT_CODEX.providerModelId + ); + expect(result1.success).toBe(true); + + const collector1 = createEventCollector(env.sentEvents, workspaceId); + await collector1.waitForEvent("stream-end", 30000); + assertStreamSuccess(collector1); + env.sentEvents.length = 0; // Clear events + + // Second message - should use previousResponseId from first + const result2 = await sendMessageWithModel( + env.mockIpcRenderer, + workspaceId, + "Now add 3 to that", + "openai", + KNOWN_MODELS.GPT_CODEX.providerModelId + ); + expect(result2.success).toBe(true); + + const collector2 = createEventCollector(env.sentEvents, workspaceId); + await collector2.waitForEvent("stream-end", 30000); + assertStreamSuccess(collector2); + + // Verify history contains both messages + const history = await env.mockIpcRenderer.invoke(IPC_CHANNELS.HISTORY_GET, workspaceId); + expect(history.success).toBe(true); + expect(history.data.length).toBeGreaterThanOrEqual(4); // 2 user + 2 assistant + + // Verify assistant messages have responseId + const assistantMessages = history.data.filter((m: any) => m.role === "assistant"); + expect(assistantMessages.length).toBeGreaterThanOrEqual(2); + expect(assistantMessages[0].metadata?.providerMetadata?.openai?.responseId).toBeDefined(); + expect(assistantMessages[1].metadata?.providerMetadata?.openai?.responseId).toBeDefined(); + } finally { + await cleanup(); + } + }, + 60000 + ); +}); From 8fb9ad1b5a25a6e1993ef8d3f642af1ce5e5a9dc Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 14 Nov 2025 02:23:38 +0000 Subject: [PATCH 4/9] =?UTF-8?q?=F0=9F=A4=96=20style:=20fix=20lint=20errors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/constants/knownModels.ts | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/constants/knownModels.ts b/src/constants/knownModels.ts index 512ab41e0..e92c17d50 100644 --- a/src/constants/knownModels.ts +++ b/src/constants/knownModels.ts @@ -98,25 +98,25 @@ export const DEFAULT_WARM_MODELS = Object.values(KNOWN_MODELS) .filter((model) => model.warm) .map((model) => model.id); -export const MODEL_ABBREVIATIONS = Object.fromEntries( +export const MODEL_ABBREVIATIONS: Record = Object.fromEntries( Object.values(KNOWN_MODELS) - .flatMap((model) => (model.aliases ?? []).map((alias) => [alias, model.id])) + .flatMap((model) => (model.aliases ?? []).map((alias) => [alias, model.id] as const)) .sort(([a], [b]) => a.localeCompare(b)) -) as Record; +); -export const TOKENIZER_MODEL_OVERRIDES = Object.fromEntries( +export const TOKENIZER_MODEL_OVERRIDES: Record = Object.fromEntries( Object.values(KNOWN_MODELS) .filter((model) => Boolean(model.tokenizerOverride)) - .map((model) => [model.id, model.tokenizerOverride as string]) -) as Record; - -export const MODEL_NAMES = Object.entries(KNOWN_MODELS).reduce( - (acc, [key, model]) => { - if (!acc[model.provider]) { - acc[model.provider] = {} as Record; - } - acc[model.provider][key] = model.providerModelId; - return acc; - }, - {} as Record> + .map((model) => [model.id, model.tokenizerOverride!]) ); + +export const MODEL_NAMES: Record> = Object.entries( + KNOWN_MODELS +).reduce>>((acc, [key, model]) => { + if (!acc[model.provider]) { + const emptyRecord: Record = {}; + acc[model.provider] = emptyRecord; + } + acc[model.provider][key] = model.providerModelId; + return acc; +}, {} as Record>); From 7ca2a67fcc64d471860f77e0fc4d323830acaa89 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 14 Nov 2025 02:25:53 +0000 Subject: [PATCH 5/9] =?UTF-8?q?=F0=9F=A4=96=20fix:=20wrap=20multi-turn=20t?= =?UTF-8?q?est=20in=20describeIntegration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/ipcMain/sendMessage.test.ts | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/ipcMain/sendMessage.test.ts b/tests/ipcMain/sendMessage.test.ts index f53f4b065..1feab86bd 100644 --- a/tests/ipcMain/sendMessage.test.ts +++ b/tests/ipcMain/sendMessage.test.ts @@ -1590,7 +1590,8 @@ describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => { ); }); - // Test multi-turn conversation specifically for reasoning models (codex) +// Test multi-turn conversation specifically for reasoning models (codex) +describeIntegration("Multi-turn conversation tests", () => { test.concurrent( "should handle multi-turn conversation with response ID persistence (openai reasoning models)", async () => { @@ -1626,15 +1627,17 @@ describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => { assertStreamSuccess(collector2); // Verify history contains both messages - const history = await env.mockIpcRenderer.invoke(IPC_CHANNELS.HISTORY_GET, workspaceId); - expect(history.success).toBe(true); - expect(history.data.length).toBeGreaterThanOrEqual(4); // 2 user + 2 assistant + const history = await readChatHistory(env.tempDir, workspaceId); + expect(history.length).toBeGreaterThanOrEqual(4); // 2 user + 2 assistant // Verify assistant messages have responseId - const assistantMessages = history.data.filter((m: any) => m.role === "assistant"); + const assistantMessages = history.filter((m) => m.role === "assistant"); expect(assistantMessages.length).toBeGreaterThanOrEqual(2); - expect(assistantMessages[0].metadata?.providerMetadata?.openai?.responseId).toBeDefined(); - expect(assistantMessages[1].metadata?.providerMetadata?.openai?.responseId).toBeDefined(); + // Check that responseId exists (type is unknown from JSONL parsing) + const firstAssistant = assistantMessages[0] as any; + const secondAssistant = assistantMessages[1] as any; + expect(firstAssistant.metadata?.providerMetadata?.openai?.responseId).toBeDefined(); + expect(secondAssistant.metadata?.providerMetadata?.openai?.responseId).toBeDefined(); } finally { await cleanup(); } From 1e116d1f1b9cc9ba2484558ee528c1971ccddaa0 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 14 Nov 2025 02:26:53 +0000 Subject: [PATCH 6/9] =?UTF-8?q?=F0=9F=A4=96=20fix:=20move=20multi-turn=20t?= =?UTF-8?q?est=20inside=20describeIntegration=20block?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/ipcMain/sendMessage.test.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/ipcMain/sendMessage.test.ts b/tests/ipcMain/sendMessage.test.ts index 1feab86bd..643d8e707 100644 --- a/tests/ipcMain/sendMessage.test.ts +++ b/tests/ipcMain/sendMessage.test.ts @@ -1588,10 +1588,8 @@ describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => { }, 40000 ); -}); -// Test multi-turn conversation specifically for reasoning models (codex) -describeIntegration("Multi-turn conversation tests", () => { + // Test multi-turn conversation specifically for reasoning models (codex) test.concurrent( "should handle multi-turn conversation with response ID persistence (openai reasoning models)", async () => { From 8d6d1e0717067ca38d6e79412bf38220fca6e054 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 14 Nov 2025 02:28:25 +0000 Subject: [PATCH 7/9] =?UTF-8?q?=F0=9F=A4=96=20style:=20run=20fmt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/constants/knownModels.ts | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/constants/knownModels.ts b/src/constants/knownModels.ts index e92c17d50..254a91b8e 100644 --- a/src/constants/knownModels.ts +++ b/src/constants/knownModels.ts @@ -24,7 +24,7 @@ interface KnownModel extends KnownModelDefinition { id: `${ModelProvider}:${string}`; } -// Model definitions. Note we avoid listing legacy models here. These represent the focal models +// Model definitions. Note we avoid listing legacy models here. These represent the focal models // of the community. const MODEL_DEFINITIONS = { SONNET: { @@ -112,11 +112,14 @@ export const TOKENIZER_MODEL_OVERRIDES: Record = Object.fromEntr export const MODEL_NAMES: Record> = Object.entries( KNOWN_MODELS -).reduce>>((acc, [key, model]) => { - if (!acc[model.provider]) { - const emptyRecord: Record = {}; - acc[model.provider] = emptyRecord; - } - acc[model.provider][key] = model.providerModelId; - return acc; -}, {} as Record>); +).reduce>>( + (acc, [key, model]) => { + if (!acc[model.provider]) { + const emptyRecord: Record = {}; + acc[model.provider] = emptyRecord; + } + acc[model.provider][key] = model.providerModelId; + return acc; + }, + {} as Record> +); From cf92b93be57370235aecb2c9bb8786b5c0593086 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 14 Nov 2025 02:32:16 +0000 Subject: [PATCH 8/9] =?UTF-8?q?=F0=9F=A4=96=20Fix=20formatting=20issues?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _Generated with `mux`_ --- src/services/mock/scenarios/toolFlows.ts | 7 +- src/utils/ai/providerOptions.ts | 4 +- src/utils/main/tokenizer.ts | 5 +- src/utils/tokens/models.json | 2939 +++++----------------- tests/ipcMain/sendMessage.test.ts | 10 +- tests/models/knownModels.test.ts | 14 +- 6 files changed, 636 insertions(+), 2343 deletions(-) diff --git a/src/services/mock/scenarios/toolFlows.ts b/src/services/mock/scenarios/toolFlows.ts index 3da467993..937162784 100644 --- a/src/services/mock/scenarios/toolFlows.ts +++ b/src/services/mock/scenarios/toolFlows.ts @@ -20,7 +20,12 @@ const fileReadTurn: ScenarioTurn = { assistant: { messageId: "msg-tool-file-read", events: [ - { kind: "stream-start", delay: 0, messageId: "msg-tool-file-read", model: KNOWN_MODELS.GPT.id }, + { + kind: "stream-start", + delay: 0, + messageId: "msg-tool-file-read", + model: KNOWN_MODELS.GPT.id, + }, { kind: "tool-start", delay: STREAM_BASE_DELAY, diff --git a/src/utils/ai/providerOptions.ts b/src/utils/ai/providerOptions.ts index e8228b1bb..08703571d 100644 --- a/src/utils/ai/providerOptions.ts +++ b/src/utils/ai/providerOptions.ts @@ -127,7 +127,7 @@ export function buildProviderOptions( if (messages && messages.length > 0 && reasoningEffort) { // Parse current model name (without provider prefix) const [, currentModelName] = modelString.split(":"); - + // Find last assistant message from the same model for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; @@ -135,7 +135,7 @@ export function buildProviderOptions( // Check if this message is from the same model const msgModel = msg.metadata?.model; const [, msgModelName] = msgModel?.split(":") ?? []; - + if (msgModelName === currentModelName) { const metadata = msg.metadata?.providerMetadata; if (metadata && "openai" in metadata) { diff --git a/src/utils/main/tokenizer.ts b/src/utils/main/tokenizer.ts index 7a5632c54..d34c35700 100644 --- a/src/utils/main/tokenizer.ts +++ b/src/utils/main/tokenizer.ts @@ -5,10 +5,7 @@ import { getAvailableTools, getToolSchemas } from "@/utils/tools/toolDefinitions import type { CountTokensInput } from "./tokenizer.worker"; import { models, type ModelName } from "ai-tokenizer"; import { run } from "./workerPool"; -import { - TOKENIZER_MODEL_OVERRIDES, - DEFAULT_WARM_MODELS, -} from "@/constants/knownModels"; +import { TOKENIZER_MODEL_OVERRIDES, DEFAULT_WARM_MODELS } from "@/constants/knownModels"; /** * Public tokenizer interface exposed to callers. diff --git a/src/utils/tokens/models.json b/src/utils/tokens/models.json index cacb24c1f..3cb27be51 100644 --- a/src/utils/tokens/models.json +++ b/src/utils/tokens/models.json @@ -20,13 +20,7 @@ "search_context_size_low": 0, "search_context_size_medium": 0 }, - "supported_regions": [ - "global", - "us-west-2", - "eu-west-1", - "ap-southeast-1", - "ap-northeast-1" - ], + "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -145,9 +139,7 @@ "mode": "image_generation", "output_cost_per_image": 0.021, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/dall-e-3": { "litellm_provider": "aiml", @@ -157,9 +149,7 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro": { "litellm_provider": "aiml", @@ -169,25 +159,19 @@ "mode": "image_generation", "output_cost_per_image": 0.053, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro/v1.1": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.042, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro/v1.1-ultra": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.063, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-realism": { "litellm_provider": "aiml", @@ -197,9 +181,7 @@ "mode": "image_generation", "output_cost_per_image": 0.037, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/dev": { "litellm_provider": "aiml", @@ -209,9 +191,7 @@ "mode": "image_generation", "output_cost_per_image": 0.026, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/kontext-max/text-to-image": { "litellm_provider": "aiml", @@ -221,9 +201,7 @@ "mode": "image_generation", "output_cost_per_image": 0.084, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/kontext-pro/text-to-image": { "litellm_provider": "aiml", @@ -233,9 +211,7 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/schnell": { "litellm_provider": "aiml", @@ -245,9 +221,7 @@ "mode": "image_generation", "output_cost_per_image": 0.003, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -1048,16 +1022,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -1086,16 +1053,9 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -1209,14 +1169,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1611,18 +1565,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1645,18 +1590,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1678,18 +1614,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1712,18 +1639,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1745,18 +1663,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1778,18 +1687,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1892,17 +1792,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1955,17 +1847,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2003,9 +1887,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -2014,16 +1896,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/speech" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "audio" - ] + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] }, "azure/gpt-4o-realtime-preview-2024-10-01": { "cache_creation_input_audio_token_cost": 0.00002, @@ -2055,14 +1930,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00002, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2078,9 +1947,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -2091,18 +1958,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2123,18 +1981,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2156,18 +2005,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2188,18 +2028,9 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2220,16 +2051,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2250,18 +2074,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2282,18 +2097,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2314,18 +2120,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2346,18 +2143,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2378,16 +2166,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "source": "https://learn.microsoft.com/en-us/azure/ai-foundry/foundry-models/concepts/models-sold-directly-by-azure?pivots=azure-openai&tabs=global-standard-aoai%2Cstandard-chat-completions%2Cglobal-standard#gpt-5", - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -2403,9 +2184,7 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, @@ -2430,171 +2209,133 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/gpt-image-1-mini": { "input_cost_per_pixel": 8.0566406e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 2.0751953125e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 2.0345052083e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 8.056640625e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 7.9752604167e-9, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1536/gpt-image-1-mini": { "input_cost_per_pixel": 3.173828125e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1536-x-1024/gpt-image-1-mini": { "input_cost_per_pixel": 3.1575520833e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/mistral-large-2402": { "input_cost_per_token": 0.000008, @@ -2716,18 +2457,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2746,18 +2478,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2775,18 +2498,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -2837,18 +2551,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2867,18 +2572,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2896,18 +2592,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -3103,14 +2790,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -3212,18 +2893,14 @@ "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure_ai/FLUX.1-Kontext-pro": { "litellm_provider": "azure_ai", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure_ai/Llama-3.2-11B-Vision-Instruct": { "input_cost_per_token": 3.7e-7, @@ -3506,36 +3183,28 @@ "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://devblogs.microsoft.com/foundry/whats-new-in-azure-ai-foundry-august-2025/#mistral-document-ai-(ocr)-%E2%80%94-serverless-in-foundry" }, "azure_ai/doc-intelligence/prebuilt-read": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.0015, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-layout": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/doc-intelligence/prebuilt-document": { "litellm_provider": "azure_ai", "ocr_cost_per_page": 0.01, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://azure.microsoft.com/en-us/pricing/details/ai-document-intelligence/" }, "azure_ai/MAI-DS-R1": { @@ -3627,13 +3296,8 @@ "output_cost_per_token": 0, "output_vector_size": 3072, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image"], "supports_embedding_image_input": true }, "azure_ai/global/grok-3": { @@ -5665,16 +5329,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -5879,16 +5536,9 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -5908,9 +5558,7 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": [ - "/v1/chat/completions" - ], + "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -5929,9 +5577,7 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": [ - "/v1/chat/completions" - ], + "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -5968,18 +5614,12 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5997,18 +5637,12 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6094,19 +5728,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6125,19 +5753,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6156,19 +5778,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6253,37 +5869,25 @@ "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.2e-7, "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "cache_read_input_token_cost": 2e-7, "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "cache_read_input_token_cost": 4e-7, "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6301,34 +5905,22 @@ { "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6347,37 +5939,25 @@ "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.8e-7, "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "cache_read_input_token_cost": 6e-7, "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6395,34 +5975,22 @@ { "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6440,26 +6008,17 @@ { "input_cost_per_token": 0.0000012, "output_cost_per_token": 0.000006, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 0.0000024, "output_cost_per_token": 0.000012, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 252000 - ] + "range": [128000, 252000] } ] }, @@ -6677,9 +6236,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-conversationalai": { "input_cost_per_second": 0.00020833, @@ -6691,9 +6248,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-finance": { "input_cost_per_second": 0.00020833, @@ -6705,9 +6260,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-general": { "input_cost_per_second": 0.00020833, @@ -6719,9 +6272,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-meeting": { "input_cost_per_second": 0.00020833, @@ -6733,9 +6284,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-phonecall": { "input_cost_per_second": 0.00020833, @@ -6747,9 +6296,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-video": { "input_cost_per_second": 0.00020833, @@ -6761,9 +6308,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-voicemail": { "input_cost_per_second": 0.00020833, @@ -6775,9 +6320,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced": { "input_cost_per_second": 0.00024167, @@ -6789,9 +6332,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-finance": { "input_cost_per_second": 0.00024167, @@ -6803,9 +6344,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-general": { "input_cost_per_second": 0.00024167, @@ -6817,9 +6356,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-meeting": { "input_cost_per_second": 0.00024167, @@ -6831,9 +6368,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-phonecall": { "input_cost_per_second": 0.00024167, @@ -6845,9 +6380,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova": { "input_cost_per_second": 0.00007167, @@ -6859,9 +6392,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2": { "input_cost_per_second": 0.00007167, @@ -6873,9 +6404,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-atc": { "input_cost_per_second": 0.00007167, @@ -6887,9 +6416,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-automotive": { "input_cost_per_second": 0.00007167, @@ -6901,9 +6428,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-conversationalai": { "input_cost_per_second": 0.00007167, @@ -6915,9 +6440,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-drivethru": { "input_cost_per_second": 0.00007167, @@ -6929,9 +6452,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-finance": { "input_cost_per_second": 0.00007167, @@ -6943,9 +6464,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-general": { "input_cost_per_second": 0.00007167, @@ -6957,9 +6476,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-meeting": { "input_cost_per_second": 0.00007167, @@ -6971,9 +6488,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-phonecall": { "input_cost_per_second": 0.00007167, @@ -6985,9 +6500,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-video": { "input_cost_per_second": 0.00007167, @@ -6999,9 +6512,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-voicemail": { "input_cost_per_second": 0.00007167, @@ -7013,9 +6524,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3": { "input_cost_per_second": 0.00007167, @@ -7027,9 +6536,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-general": { "input_cost_per_second": 0.00007167, @@ -7041,9 +6548,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-medical": { "input_cost_per_second": 0.00008667, @@ -7055,9 +6560,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-general": { "input_cost_per_second": 0.00007167, @@ -7069,9 +6572,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-phonecall": { "input_cost_per_second": 0.00007167, @@ -7083,9 +6584,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper": { "input_cost_per_second": 0.0001, @@ -7096,9 +6595,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-base": { "input_cost_per_second": 0.0001, @@ -7109,9 +6606,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-large": { "input_cost_per_second": 0.0001, @@ -7122,9 +6617,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-medium": { "input_cost_per_second": 0.0001, @@ -7135,9 +6628,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-small": { "input_cost_per_second": 0.0001, @@ -7148,9 +6639,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-tiny": { "input_cost_per_second": 0.0001, @@ -7161,9 +6650,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepinfra/Gryphe/MythoMax-L2-13b": { "max_tokens": 4096, @@ -8005,17 +7492,11 @@ "tiered_pricing": [ { "input_cost_per_query": 0.005, - "max_results_range": [ - 0, - 25 - ] + "max_results_range": [0, 25] }, { "input_cost_per_query": 0.025, - "max_results_range": [ - 26, - 100 - ] + "max_results_range": [26, 100] } ] }, @@ -8025,73 +7506,43 @@ "tiered_pricing": [ { "input_cost_per_query": 0.00166, - "max_results_range": [ - 1, - 10 - ] + "max_results_range": [1, 10] }, { "input_cost_per_query": 0.00332, - "max_results_range": [ - 11, - 20 - ] + "max_results_range": [11, 20] }, { "input_cost_per_query": 0.00498, - "max_results_range": [ - 21, - 30 - ] + "max_results_range": [21, 30] }, { "input_cost_per_query": 0.00664, - "max_results_range": [ - 31, - 40 - ] + "max_results_range": [31, 40] }, { "input_cost_per_query": 0.0083, - "max_results_range": [ - 41, - 50 - ] + "max_results_range": [41, 50] }, { "input_cost_per_query": 0.00996, - "max_results_range": [ - 51, - 60 - ] + "max_results_range": [51, 60] }, { "input_cost_per_query": 0.01162, - "max_results_range": [ - 61, - 70 - ] + "max_results_range": [61, 70] }, { "input_cost_per_query": 0.01328, - "max_results_range": [ - 71, - 80 - ] + "max_results_range": [71, 80] }, { "input_cost_per_query": 0.01494, - "max_results_range": [ - 81, - 90 - ] + "max_results_range": [81, 90] }, { "input_cost_per_query": 0.0166, - "max_results_range": [ - 91, - 100 - ] + "max_results_range": [91, 100] } ], "metadata": { @@ -8122,9 +7573,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "elevenlabs/scribe_v1_experimental": { "input_cost_per_second": 0.0000611, @@ -8137,9 +7586,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "embed-english-light-v2.0": { "input_cost_per_token": 1e-7, @@ -8511,41 +7958,31 @@ "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/flux-pro/v1.1-ultra": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/imagen4/preview": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/recraft/v3/text-to-image": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "fal_ai/fal-ai/stable-diffusion-v35-medium": { "litellm_provider": "fal_ai", "mode": "image_generation", "output_cost_per_image": 0.0398, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "featherless_ai/featherless-ai/Qwerky-72B": { "litellm_provider": "featherless_ai", @@ -9608,16 +9045,8 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9648,16 +9077,8 @@ "mode": "chat", "output_cost_per_token": 6e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9696,16 +9117,8 @@ "output_cost_per_token": 6e-7, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9732,15 +9145,8 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9768,15 +9174,8 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9808,20 +9207,9 @@ "output_cost_per_token": 0.000002, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -9852,16 +9240,8 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9901,16 +9281,8 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9949,16 +9321,8 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": false, "supports_parallel_function_calling": true, @@ -9988,19 +9352,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10031,20 +9385,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10078,21 +9421,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10126,21 +9457,9 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10172,20 +9491,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10217,20 +9525,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10262,20 +9559,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10307,20 +9593,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_token": 0.000002, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10353,20 +9628,9 @@ "output_cost_per_token": 0.000002, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10399,20 +9663,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10444,20 +9697,9 @@ "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10488,20 +9730,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10534,19 +9765,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10577,19 +9798,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10621,20 +9832,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10666,23 +9866,10 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], - "supported_regions": [ - "global" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supported_regions": ["global"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10714,20 +9901,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10759,12 +9935,8 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11209,16 +10381,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11249,16 +10413,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11298,16 +10454,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11335,15 +10483,8 @@ "output_cost_per_token": 3e-7, "rpm": 4000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11372,15 +10513,8 @@ "output_cost_per_token": 3e-7, "rpm": 60000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11412,20 +10546,9 @@ "output_cost_per_token": 0.0000015, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -11457,16 +10580,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11507,16 +10622,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11556,16 +10663,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11637,20 +10736,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11686,21 +10774,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11734,21 +10810,9 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11781,20 +10845,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11828,20 +10881,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11875,20 +10917,9 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11922,20 +10953,9 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11969,20 +10989,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12016,20 +11025,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -12063,19 +11061,9 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12107,19 +11095,9 @@ "output_cost_per_token": 0.0000025, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12152,16 +11130,9 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -12192,19 +11163,9 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12237,19 +11198,9 @@ "output_cost_per_token_above_200k_tokens": 0, "rpm": 5, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -12282,15 +11233,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12322,15 +11266,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12363,15 +11300,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -12404,12 +11334,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -12636,12 +11562,8 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.0-fast-generate-preview": { "litellm_provider": "gemini", @@ -12650,12 +11572,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.0-generate-preview": { "litellm_provider": "gemini", @@ -12664,12 +11582,8 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.1-fast-generate-preview": { "litellm_provider": "gemini", @@ -12678,12 +11592,8 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.1-generate-preview": { "litellm_provider": "gemini", @@ -12692,12 +11602,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "google_pse/search": { "input_cost_per_query": 0.005, @@ -13090,18 +11996,9 @@ "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, "output_cost_per_token_priority": 0.000014, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13124,18 +12021,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13161,18 +12049,9 @@ "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "output_cost_per_token_priority": 0.0000028, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13195,18 +12074,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13232,18 +12102,9 @@ "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, "output_cost_per_token_priority": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13266,18 +12127,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13660,9 +12512,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -13671,16 +12521,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/speech" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "audio" - ] + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] }, "gpt-4o-realtime-preview": { "cache_read_input_token_cost": 0.0000025, @@ -13809,9 +12652,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -13828,18 +12669,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13864,18 +12696,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13900,18 +12723,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13936,18 +12750,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "image"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -13969,17 +12774,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": [ - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -14002,17 +12799,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": [ - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -14040,18 +12829,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14073,18 +12853,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -14105,18 +12876,9 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -14137,16 +12899,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14170,16 +12925,9 @@ "mode": "responses", "output_cost_per_token": 0.00001, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14201,18 +12949,11 @@ "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", - "output_cost_per_token": 0.000002, - "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "output_cost_per_token": 0.000002, + "output_cost_per_token_priority": 0.0000036, + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14239,18 +12980,9 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14278,18 +13010,9 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14315,18 +13038,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14350,18 +13064,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14378,9 +13083,7 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "gpt-image-1-mini": { "cache_read_input_image_token_cost": 2.5e-7, @@ -14390,10 +13093,7 @@ "litellm_provider": "openai", "mode": "chat", "output_cost_per_image_token": 0.000008, - "supported_endpoints": [ - "/v1/images/generations", - "/v1/images/edits" - ] + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, @@ -14408,18 +13108,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -14439,18 +13130,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 0.0000024, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -14471,18 +13153,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -14494,12 +13167,8 @@ "litellm_provider": "gradient_ai", "max_tokens": 2048, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3-opus": { @@ -14508,12 +13177,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000075, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-haiku": { @@ -14522,12 +13187,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-sonnet": { @@ -14536,12 +13197,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.7-sonnet": { @@ -14550,12 +13207,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/deepseek-r1-distill-llama-70b": { @@ -14564,12 +13217,8 @@ "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 9.9e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/llama3-8b-instruct": { @@ -14578,12 +13227,8 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/llama3.3-70b-instruct": { @@ -14592,12 +13237,8 @@ "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 6.5e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/mistral-nemo-instruct-2407": { @@ -14606,36 +13247,24 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 3e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o-mini": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-o3": { @@ -14644,12 +13273,8 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-o3-mini": { @@ -14658,12 +13283,8 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { @@ -15146,27 +13767,21 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { "input_cost_per_token": 1.2e-7, @@ -15822,27 +14437,21 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "luminous-base": { "input_cost_per_token": 0.00003, @@ -15905,75 +14514,57 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.005, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.011, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medlm-large": { "input_cost_per_character": 0.000005, @@ -16133,14 +14724,8 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -16154,14 +14739,8 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -16172,12 +14751,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -16188,12 +14763,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -16204,13 +14775,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -16221,13 +14787,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -16395,9 +14956,7 @@ "ocr_cost_per_page": 0.001, "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/mistral-ocr-2505-completion": { @@ -16405,9 +14964,7 @@ "ocr_cost_per_page": 0.001, "annotation_cost_per_page": 0.003, "mode": "ocr", - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://mistral.ai/pricing#api-pricing" }, "mistral/magistral-medium-latest": { @@ -16994,14 +15551,8 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image", - "video" - ] + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] }, "multimodalembedding@001": { "input_cost_per_character": 2e-7, @@ -17017,14 +15568,8 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image", - "video" - ] + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] }, "nscale/Qwen/QwQ-32B": { "input_cost_per_token": 1.8e-7, @@ -17060,9 +15605,7 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 3.75e-7, @@ -17167,9 +15710,7 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "o1": { "cache_read_input_token_cost": 0.0000075, @@ -17275,17 +15816,9 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -17307,17 +15840,9 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -17350,13 +15875,8 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17382,13 +15902,8 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17410,18 +15925,9 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17443,18 +15949,9 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17509,17 +16006,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17539,17 +16028,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -17614,18 +16095,9 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17647,18 +16119,9 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -18984,13 +17447,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -19003,13 +17461,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -19022,13 +17475,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -19041,13 +17489,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -19060,13 +17503,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -19901,18 +18339,14 @@ "mode": "image_generation", "output_cost_per_image": 0.022, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "recraft/recraftv3": { "litellm_provider": "recraft", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "replicate/meta/llama-2-13b": { "input_cost_per_token": 1e-7, @@ -21240,17 +19674,13 @@ "input_cost_per_character": 0.000015, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": [ - "/v1/audio/speech" - ] + "supported_endpoints": ["/v1/audio/speech"] }, "tts-1-hd": { "input_cost_per_character": 0.00003, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": [ - "/v1/audio/speech" - ] + "supported_endpoints": ["/v1/audio/speech"] }, "us.amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -21679,14 +20109,8 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -21700,14 +20124,8 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -23108,9 +21526,7 @@ "mode": "chat", "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_regions": [ - "us-west2" - ], + "supported_regions": ["us-west2"], "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -23291,14 +21707,8 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -23311,14 +21721,8 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -23331,14 +21735,8 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -23351,14 +21749,8 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -23544,9 +21936,7 @@ "litellm_provider": "vertex_ai", "mode": "ocr", "ocr_cost_per_page": 0.0005, - "supported_endpoints": [ - "/v1/ocr" - ], + "supported_endpoints": ["/v1/ocr"], "source": "https://cloud.google.com/generative-ai-app-builder/pricing" }, "vertex_ai/openai/gpt-oss-120b-maas": { @@ -23626,12 +22016,8 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23640,12 +22026,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23654,12 +22036,8 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.1-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23668,12 +22046,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.1-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -23682,12 +22056,8 @@ "mode": "video_generation", "output_cost_per_second": 0.15, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "voyage/rerank-2": { "input_cost_per_query": 5e-8, @@ -24294,9 +22664,7 @@ "litellm_provider": "openai", "mode": "audio_transcription", "output_cost_per_second": 0.0001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "xai/grok-2": { "input_cost_per_token": 0.000002, @@ -24695,99 +23063,54 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "openai/sora-2-pro": { "litellm_provider": "openai", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://platform.openai.com/docs/api-reference/videos", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "azure/sora-2": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.1, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "azure/sora-2-pro": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.3, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "720x1280", - "1280x720" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["720x1280", "1280x720"] }, "azure/sora-2-pro-high-res": { "litellm_provider": "azure", "mode": "video_generation", "output_cost_per_video_per_second": 0.5, "source": "https://azure.microsoft.com/en-us/products/ai-services/video-generation", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1024x1792", - "1792x1024" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1024x1792", "1792x1024"] }, "runwayml/gen4_turbo": { "litellm_provider": "runwayml", "mode": "video_generation", "output_cost_per_video_per_second": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1280x720", - "720x1280" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], "metadata": { "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" } @@ -24797,17 +23120,9 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.15, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1280x720", - "720x1280" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], "metadata": { "comment": "15 credits per second @ $0.01 per credit = $0.15 per second" } @@ -24817,17 +23132,9 @@ "mode": "video_generation", "output_cost_per_video_per_second": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "video" - ], - "supported_resolutions": [ - "1280x720", - "720x1280" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["video"], + "supported_resolutions": ["1280x720", "720x1280"], "metadata": { "comment": "5 credits per second @ $0.01 per credit = $0.05 per second" } @@ -24838,17 +23145,9 @@ "input_cost_per_image": 0.05, "output_cost_per_image": 0.05, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "image" - ], - "supported_resolutions": [ - "1280x720", - "1920x1080" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["image"], + "supported_resolutions": ["1280x720", "1920x1080"], "metadata": { "comment": "5 credits per 720p image or 8 credits per 1080p image @ $0.01 per credit. Using 5 credits ($0.05) as base cost" } @@ -24859,17 +23158,9 @@ "input_cost_per_image": 0.02, "output_cost_per_image": 0.02, "source": "https://docs.dev.runwayml.com/guides/pricing/", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "image" - ], - "supported_resolutions": [ - "1280x720", - "1920x1080" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["image"], + "supported_resolutions": ["1280x720", "1920x1080"], "metadata": { "comment": "2 credits per image (any resolution) @ $0.01 per credit = $0.02 per image" } @@ -24883,4 +23174,4 @@ "comment": "Estimated cost based on standard TTS pricing. RunwayML uses ElevenLabs models." } } -} \ No newline at end of file +} diff --git a/tests/ipcMain/sendMessage.test.ts b/tests/ipcMain/sendMessage.test.ts index 643d8e707..4eca7c577 100644 --- a/tests/ipcMain/sendMessage.test.ts +++ b/tests/ipcMain/sendMessage.test.ts @@ -1604,12 +1604,12 @@ describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => { KNOWN_MODELS.GPT_CODEX.providerModelId ); expect(result1.success).toBe(true); - + const collector1 = createEventCollector(env.sentEvents, workspaceId); await collector1.waitForEvent("stream-end", 30000); assertStreamSuccess(collector1); env.sentEvents.length = 0; // Clear events - + // Second message - should use previousResponseId from first const result2 = await sendMessageWithModel( env.mockIpcRenderer, @@ -1619,15 +1619,15 @@ describe.each(PROVIDER_CONFIGS)("%s:%s image support", (provider, model) => { KNOWN_MODELS.GPT_CODEX.providerModelId ); expect(result2.success).toBe(true); - + const collector2 = createEventCollector(env.sentEvents, workspaceId); await collector2.waitForEvent("stream-end", 30000); assertStreamSuccess(collector2); - + // Verify history contains both messages const history = await readChatHistory(env.tempDir, workspaceId); expect(history.length).toBeGreaterThanOrEqual(4); // 2 user + 2 assistant - + // Verify assistant messages have responseId const assistantMessages = history.filter((m) => m.role === "assistant"); expect(assistantMessages.length).toBeGreaterThanOrEqual(2); diff --git a/tests/models/knownModels.test.ts b/tests/models/knownModels.test.ts index 41b5b42cc..696216cc6 100644 --- a/tests/models/knownModels.test.ts +++ b/tests/models/knownModels.test.ts @@ -1,6 +1,6 @@ /** * Integration test for known models - verifies all models exist in models.json - * + * * This test does NOT go through IPC - it directly uses data from models.json * to verify that every providerModelId in KNOWN_MODELS exists. */ @@ -12,30 +12,30 @@ import modelsJson from "@/utils/tokens/models.json"; describe("Known Models Integration", () => { test("all known models exist in models.json", () => { const missingModels: string[] = []; - + for (const [key, model] of Object.entries(KNOWN_MODELS)) { const modelId = model.providerModelId; - + // Check if model exists in models.json if (!(modelId in modelsJson)) { missingModels.push(`${key}: ${model.provider}:${modelId}`); } } - + // Report all missing models at once for easier debugging if (missingModels.length > 0) { throw new Error( `The following known models are missing from models.json:\n${missingModels.join("\n")}\n\n` + - `Run 'bun scripts/update_models.ts' to refresh models.json from LiteLLM.` + `Run 'bun scripts/update_models.ts' to refresh models.json from LiteLLM.` ); } }); - + test("all known models have required metadata", () => { for (const [key, model] of Object.entries(KNOWN_MODELS)) { const modelId = model.providerModelId; const modelData = modelsJson[modelId as keyof typeof modelsJson] as Record; - + expect(modelData).toBeDefined(); // Check that basic metadata fields exist (not all models have all fields) expect(typeof modelData.litellm_provider).toBe("string"); From 01364e97633248ded2843fbbdfd25eac12d7650f Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 14 Nov 2025 02:43:24 +0000 Subject: [PATCH 9/9] =?UTF-8?q?=F0=9F=A4=96=20fix:=20ensure=20default=20mo?= =?UTF-8?q?del=20is=20first=20in=20LRU=20initialization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The E2E test failure was caused by MODEL_ABBREVIATIONS being sorted alphabetically, which made gpt-5.1-codex the first model in the LRU instead of the default anthropic:claude-sonnet-4-5. Fix: Ensure defaultModel is always first in DEFAULT_MODELS array. _Generated with `mux`_ --- src/hooks/useModelLRU.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hooks/useModelLRU.ts b/src/hooks/useModelLRU.ts index 9ec1d47fd..47947e516 100644 --- a/src/hooks/useModelLRU.ts +++ b/src/hooks/useModelLRU.ts @@ -7,7 +7,11 @@ const MAX_LRU_SIZE = 8; const LRU_KEY = "model-lru"; // Default models from abbreviations (for initial LRU population) -const DEFAULT_MODELS = Object.values(MODEL_ABBREVIATIONS); +// Ensure defaultModel is first, then fill with other abbreviations +const DEFAULT_MODELS = [ + defaultModel, + ...Object.values(MODEL_ABBREVIATIONS).filter((m) => m !== defaultModel), +].slice(0, MAX_LRU_SIZE); /** * Get the default model from LRU (non-hook version for use outside React)