From 0e2a6303d31ad45e0a36e94a23097822f557d9cc Mon Sep 17 00:00:00 2001 From: Ammar Date: Wed, 15 Oct 2025 13:32:49 -0500 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=A4=96=20Add=20Haiku=204-5=20support?= =?UTF-8?q?=20and=20centralize=20default=20model=20logic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add claude-haiku-4-5 to MODEL_ABBREVIATIONS with 'haiku' shortcut - Add pricing/config for Haiku 4-5 to models-extra.ts ($1/$5 per million tokens) - Reorder MODEL_ABBREVIATIONS to put sonnet first (becomes default for new chats) - Centralize default model logic: created getDefaultModelFromLRU() in useModelLRU.ts - Remove all imports of defaultModel except in models.ts and useModelLRU.ts - Update all code paths to use LRU for default model selection: - useSendMessageOptions hook - getSendOptionsFromStorage (non-hook) - useAIViewKeybinds - Debug scripts (costs, agentSessionCli, send-message) - Update models.json with latest model pricing data from LiteLLM This makes the system less prescriptive - the most recently used model becomes the default for new chats, creating a natural user-driven flow. --- scripts/update_models.ts | 4 +- src/debug/agentSessionCli.ts | 4 +- src/debug/costs.ts | 4 +- src/debug/send-message.ts | 4 +- src/hooks/useAIViewKeybinds.ts | 6 +- src/hooks/useModelLRU.ts | 14 +- src/hooks/useSendMessageOptions.ts | 12 +- src/utils/messages/sendOptions.ts | 6 +- src/utils/slashCommands/registry.ts | 4 +- src/utils/tokens/models-extra.ts | 16 + src/utils/tokens/models.json | 3902 +++++++++++++++++++++------ 11 files changed, 3161 insertions(+), 815 deletions(-) diff --git a/scripts/update_models.ts b/scripts/update_models.ts index c6e840dd07..d1de810fd0 100644 --- a/scripts/update_models.ts +++ b/scripts/update_models.ts @@ -2,12 +2,12 @@ /** * Downloads the latest model prices and context window data from LiteLLM - * and saves it to src/utils/models.json + * and saves it to src/utils/tokens/models.json */ const LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"; -const OUTPUT_PATH = "src/utils/models.json"; +const OUTPUT_PATH = "src/utils/tokens/models.json"; async function updateModels() { console.log(`Fetching model data from ${LITELLM_URL}...`); diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts index e8a0f694fb..1fd786a570 100644 --- a/src/debug/agentSessionCli.ts +++ b/src/debug/agentSessionCli.ts @@ -22,7 +22,7 @@ import { type SendMessageOptions, type WorkspaceChatMessage, } from "@/types/ipc"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig"; import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils"; import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors"; @@ -184,7 +184,7 @@ async function main(): Promise { throw new Error("Message must be provided via --message or stdin"); } - const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel; + const model = values.model && values.model.trim().length > 0 ? values.model.trim() : getDefaultModelFromLRU(); const timeoutMs = parseTimeout(values.timeout); const thinkingLevel = parseThinkingLevel(values["thinking-level"]); const initialMode = parseMode(values.mode); diff --git a/src/debug/costs.ts b/src/debug/costs.ts index 62fcbca38f..967c0852de 100644 --- a/src/debug/costs.ts +++ b/src/debug/costs.ts @@ -3,7 +3,7 @@ import * as path from "path"; import { defaultConfig } from "@/config"; import type { CmuxMessage } from "@/types/message"; import { calculateTokenStats } from "@/utils/tokens/tokenStatsCalculator"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; /** * Debug command to display cost/token statistics for a workspace @@ -35,7 +35,7 @@ export function costsCommand(workspaceId: string) { // Detect model from first assistant message const firstAssistantMessage = messages.find((msg) => msg.role === "assistant"); - const model = firstAssistantMessage?.metadata?.model ?? defaultModel; + const model = firstAssistantMessage?.metadata?.model ?? getDefaultModelFromLRU(); // Calculate stats using shared logic (now synchronous) const stats = calculateTokenStats(messages, model); diff --git a/src/debug/send-message.ts b/src/debug/send-message.ts index 420ab8f64f..270b61603f 100644 --- a/src/debug/send-message.ts +++ b/src/debug/send-message.ts @@ -3,7 +3,7 @@ import * as path from "path"; import { defaultConfig } from "@/config"; import type { CmuxMessage } from "@/types/message"; import type { SendMessageOptions } from "@/types/ipc"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; /** * Debug command to send a message to a workspace, optionally editing an existing message @@ -103,7 +103,7 @@ export function sendMessageCommand( // Prepare options const options: SendMessageOptions = { - model: defaultModel, + model: getDefaultModelFromLRU(), }; if (editMessageId) { diff --git a/src/hooks/useAIViewKeybinds.ts b/src/hooks/useAIViewKeybinds.ts index ba1c16e2e8..5dce224d1b 100644 --- a/src/hooks/useAIViewKeybinds.ts +++ b/src/hooks/useAIViewKeybinds.ts @@ -6,7 +6,7 @@ import { updatePersistedState, readPersistedState } from "@/hooks/usePersistedSt import type { ThinkingLevel, ThinkingLevelOn } from "@/types/thinking"; import { DEFAULT_THINKING_LEVEL } from "@/types/thinking"; import { getThinkingPolicyForModel } from "@/utils/thinking/policy"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; interface UseAIViewKeybindsParams { workspaceId: string; @@ -66,10 +66,10 @@ export function useAIViewKeybinds({ e.preventDefault(); // Get selected model from localStorage (what user sees in UI) - // Fall back to message history model, then to default model + // Fall back to message history model, then to most recent model from LRU // This matches the same logic as useSendMessageOptions const selectedModel = readPersistedState(getModelKey(workspaceId), null); - const modelToUse = selectedModel ?? currentModel ?? defaultModel; + const modelToUse = selectedModel ?? currentModel ?? getDefaultModelFromLRU(); // Storage key for remembering this model's last-used active thinking level const lastThinkingKey = getLastThinkingByModelKey(modelToUse); diff --git a/src/hooks/useModelLRU.ts b/src/hooks/useModelLRU.ts index 42eeea623c..0e2575713d 100644 --- a/src/hooks/useModelLRU.ts +++ b/src/hooks/useModelLRU.ts @@ -1,6 +1,7 @@ import { useCallback, useEffect } from "react"; -import { usePersistedState } from "./usePersistedState"; +import { usePersistedState, readPersistedState } from "./usePersistedState"; import { MODEL_ABBREVIATIONS } from "@/utils/slashCommands/registry"; +import { defaultModel } from "@/utils/ai/models"; const MAX_LRU_SIZE = 8; const LRU_KEY = "model-lru"; @@ -8,6 +9,17 @@ const LRU_KEY = "model-lru"; // Default models from abbreviations (for initial LRU population) const DEFAULT_MODELS = Object.values(MODEL_ABBREVIATIONS); +/** + * Get the default model from LRU (non-hook version for use outside React) + * This is the ONLY place that reads from LRU outside of the hook. + * + * @returns The most recently used model, or defaultModel if LRU is empty + */ +export function getDefaultModelFromLRU(): string { + const lru = readPersistedState(LRU_KEY, []); + return lru[0] ?? defaultModel; +} + /** * Hook to manage a Least Recently Used (LRU) cache of AI models. * Stores up to 8 recently used models in localStorage. diff --git a/src/hooks/useSendMessageOptions.ts b/src/hooks/useSendMessageOptions.ts index 2ae7feea48..fa54a075db 100644 --- a/src/hooks/useSendMessageOptions.ts +++ b/src/hooks/useSendMessageOptions.ts @@ -2,8 +2,8 @@ import { use1MContext } from "./use1MContext"; import { useThinkingLevel } from "./useThinkingLevel"; import { useMode } from "@/contexts/ModeContext"; import { usePersistedState } from "./usePersistedState"; +import { useModelLRU } from "./useModelLRU"; import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils"; -import { defaultModel } from "@/utils/ai/models"; import { getModelKey } from "@/constants/storage"; import type { SendMessageOptions } from "@/types/ipc"; import type { UIMode } from "@/types/mode"; @@ -19,13 +19,14 @@ function constructSendMessageOptions( mode: UIMode, thinkingLevel: ThinkingLevel, preferredModel: string | null | undefined, - use1M: boolean + use1M: boolean, + fallbackModel: string ): SendMessageOptions { const additionalSystemInstructions = mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined; // Ensure model is always a valid string (defensive against corrupted localStorage) const model = - typeof preferredModel === "string" && preferredModel ? preferredModel : defaultModel; + typeof preferredModel === "string" && preferredModel ? preferredModel : fallbackModel; // Enforce thinking policy at the UI boundary as well (e.g., gpt-5-pro → high only) const uiThinking = enforceThinkingPolicy(model, thinkingLevel); @@ -58,13 +59,14 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptions { const [use1M] = use1MContext(); const [thinkingLevel] = useThinkingLevel(); const [mode] = useMode(); + const { recentModels } = useModelLRU(); const [preferredModel] = usePersistedState( getModelKey(workspaceId), - defaultModel, + recentModels[0], // Most recently used model (LRU is never empty) { listener: true } // Listen for changes from ModelSelector and other sources ); - return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M); + return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M, recentModels[0]); } /** diff --git a/src/utils/messages/sendOptions.ts b/src/utils/messages/sendOptions.ts index 8454070196..6fd6623b44 100644 --- a/src/utils/messages/sendOptions.ts +++ b/src/utils/messages/sendOptions.ts @@ -4,13 +4,13 @@ import { getModeKey, USE_1M_CONTEXT_KEY, } from "@/constants/storage"; -import { defaultModel } from "@/utils/ai/models"; import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils"; import { readPersistedState } from "@/hooks/usePersistedState"; import type { SendMessageOptions } from "@/types/ipc"; import type { UIMode } from "@/types/mode"; import type { ThinkingLevel } from "@/types/thinking"; import { enforceThinkingPolicy } from "@/utils/thinking/policy"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; /** * Get send options from localStorage @@ -20,8 +20,8 @@ import { enforceThinkingPolicy } from "@/utils/thinking/policy"; * This ensures DRY - single source of truth for option extraction. */ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptions { - // Read model preference (workspace-specific) - const model = readPersistedState(getModelKey(workspaceId), defaultModel); + // Read model preference (workspace-specific), fallback to most recent from LRU + const model = readPersistedState(getModelKey(workspaceId), getDefaultModelFromLRU()); // Read thinking level (workspace-specific) const thinkingLevel = readPersistedState( diff --git a/src/utils/slashCommands/registry.ts b/src/utils/slashCommands/registry.ts index a2a95bfae2..063f46f860 100644 --- a/src/utils/slashCommands/registry.ts +++ b/src/utils/slashCommands/registry.ts @@ -11,9 +11,11 @@ import type { import minimist from "minimist"; // Model abbreviations for common models +// Order matters: first model becomes the default for new chats export const MODEL_ABBREVIATIONS: Record = { - opus: "anthropic:claude-opus-4-1", sonnet: "anthropic:claude-sonnet-4-5", + haiku: "anthropic:claude-haiku-4-5", + opus: "anthropic:claude-opus-4-1", "gpt-5": "openai:gpt-5", "gpt-5-pro": "openai:gpt-5-pro", codex: "openai:gpt-5-codex", diff --git a/src/utils/tokens/models-extra.ts b/src/utils/tokens/models-extra.ts index bf191768ff..cfa6431813 100644 --- a/src/utils/tokens/models-extra.ts +++ b/src/utils/tokens/models-extra.ts @@ -39,4 +39,20 @@ export const modelsExtra: Record = { knowledge_cutoff: "2024-09-30", supported_endpoints: ["/v1/responses"], }, + + // Claude Haiku 4.5 - Released October 15, 2025 + // $1/M input, $5/M output + "claude-haiku-4-5": { + max_input_tokens: 200000, + max_output_tokens: 8192, + input_cost_per_token: 0.000001, // $1 per million input tokens + output_cost_per_token: 0.000005, // $5 per million output tokens + cache_creation_input_token_cost: 0.00000125, // $1.25 per million tokens + cache_read_input_token_cost: 0.0000001, // $0.10 per million tokens + litellm_provider: "anthropic", + mode: "chat", + supports_function_calling: true, + supports_vision: true, + supports_response_schema: true, + }, }; diff --git a/src/utils/tokens/models.json b/src/utils/tokens/models.json index da8e10faac..02e94181eb 100644 --- a/src/utils/tokens/models.json +++ b/src/utils/tokens/models.json @@ -105,7 +105,9 @@ "mode": "image_generation", "output_cost_per_image": 0.021, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/dall-e-3": { "litellm_provider": "aiml", @@ -115,7 +117,9 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro": { "litellm_provider": "aiml", @@ -125,19 +129,25 @@ "mode": "image_generation", "output_cost_per_image": 0.053, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro/v1.1": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.042, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-pro/v1.1-ultra": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.063, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux-realism": { "litellm_provider": "aiml", @@ -147,7 +157,9 @@ "mode": "image_generation", "output_cost_per_image": 0.037, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/dev": { "litellm_provider": "aiml", @@ -157,7 +169,9 @@ "mode": "image_generation", "output_cost_per_image": 0.026, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/kontext-max/text-to-image": { "litellm_provider": "aiml", @@ -167,7 +181,9 @@ "mode": "image_generation", "output_cost_per_image": 0.084, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/kontext-pro/text-to-image": { "litellm_provider": "aiml", @@ -177,7 +193,9 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "aiml/flux/schnell": { "litellm_provider": "aiml", @@ -187,7 +205,9 @@ "mode": "image_generation", "output_cost_per_image": 0.003, "source": "https://docs.aimlapi.com/", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -846,6 +866,36 @@ "mode": "audio_transcription", "output_cost_per_second": 0 }, + "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "azure/ada": { "input_cost_per_token": 1e-7, "litellm_provider": "azure", @@ -863,9 +913,16 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -894,9 +951,16 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -1003,8 +1067,14 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1397,9 +1467,18 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1421,9 +1500,18 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1445,9 +1533,18 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1469,9 +1566,18 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1493,9 +1599,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1516,9 +1631,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1619,9 +1743,17 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1674,9 +1806,17 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1714,7 +1854,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "azure/gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -1723,9 +1865,16 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/speech"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["audio"] + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] }, "azure/gpt-4o-realtime-preview-2024-10-01": { "cache_creation_input_audio_token_cost": 0.00002, @@ -1757,8 +1906,14 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00002, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1774,7 +1929,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "azure/gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -1785,9 +1942,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1808,9 +1974,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1832,9 +2007,18 @@ "mode": "chat", "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1850,14 +2034,23 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "azure", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1878,9 +2071,16 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1901,9 +2101,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1924,9 +2133,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1947,9 +2165,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1970,9 +2197,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1989,7 +2225,9 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, @@ -2014,63 +2252,81 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure/mistral-large-2402": { "input_cost_per_token": 0.000008, @@ -2192,9 +2448,18 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2212,9 +2477,18 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2232,9 +2506,18 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -2285,9 +2568,18 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2306,9 +2598,18 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2326,9 +2627,18 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2509,8 +2819,14 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2612,14 +2928,18 @@ "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure_ai/FLUX.1-Kontext-pro": { "litellm_provider": "azure_ai", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "azure_ai/Llama-3.2-11B-Vision-Instruct": { "input_cost_per_token": 3.7e-7, @@ -2874,6 +3194,42 @@ "supports_function_calling": true, "supports_vision": true }, + "azure_ai/Phi-4-mini-reasoning": { + "input_cost_per_token": 8e-8, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.2e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true + }, + "azure_ai/Phi-4-reasoning": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "azure_ai/MAI-DS-R1": { + "input_cost_per_token": 0.00000135, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000054, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_reasoning": true, + "supports_tool_choice": true + }, "azure_ai/cohere-rerank-v3-english": { "input_cost_per_query": 0.002, "input_cost_per_token": 0, @@ -2951,8 +3307,13 @@ "output_cost_per_token": 0, "output_vector_size": 3072, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image" + ], "supports_embedding_image_input": true }, "azure_ai/global/grok-3": { @@ -3013,19 +3374,76 @@ "supports_tool_choice": true, "supports_web_search": true }, - "azure_ai/jais-30b-chat": { - "input_cost_per_token": 0.0032, + "azure_ai/grok-4": { + "input_cost_per_token": 0.0000055, "litellm_provider": "azure_ai", - "max_input_tokens": 8192, - "max_output_tokens": 8192, - "max_tokens": 8192, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 0.00971, - "source": "https://azure.microsoft.com/en-us/products/ai-services/ai-foundry/models/jais-30b-chat" - }, - "azure_ai/jamba-instruct": { - "input_cost_per_token": 5e-7, - "litellm_provider": "azure_ai", + "output_cost_per_token": 0.0000275, + "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-non-reasoning": { + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-reasoning": { + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/announcing-the-grok-4-fast-models-from-xai-now-available-in-azure-ai-foundry/4456701", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-code-fast-1": { + "input_cost_per_token": 0.0000035, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/jais-30b-chat": { + "input_cost_per_token": 0.0032, + "litellm_provider": "azure_ai", + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00971, + "source": "https://azure.microsoft.com/en-us/products/ai-services/ai-foundry/models/jais-30b-chat" + }, + "azure_ai/jamba-instruct": { + "input_cost_per_token": 5e-7, + "litellm_provider": "azure_ai", "max_input_tokens": 70000, "max_output_tokens": 4096, "max_tokens": 4096, @@ -4423,7 +4841,7 @@ "input_cost_per_token_above_200k_tokens": 0.000006, "litellm_provider": "anthropic", "max_input_tokens": 1000000, - "max_output_tokens": 1000000, + "max_output_tokens": 64000, "max_tokens": 1000000, "mode": "chat", "output_cost_per_token": 0.000015, @@ -4448,6 +4866,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -4474,6 +4896,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -4856,9 +5282,16 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -4927,6 +5360,16 @@ "output_cost_per_token": 0, "supports_embedding_image_input": true }, + "cohere.embed-v4:0": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, "cohere.rerank-v3-5:0": { "input_cost_per_query": 0.002, "input_cost_per_token": 0, @@ -5043,9 +5486,16 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -5065,7 +5515,9 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": ["/v1/chat/completions"], + "supported_endpoints": [ + "/v1/chat/completions" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -5084,7 +5536,9 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": ["/v1/chat/completions"], + "supported_endpoints": [ + "/v1/chat/completions" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -5121,12 +5575,18 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5144,12 +5604,18 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5235,13 +5701,19 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5260,13 +5732,19 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5285,13 +5763,19 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [0, 256000] + "range": [ + 0, + 256000 + ] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5376,25 +5860,37 @@ "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "cache_read_input_token_cost": 1.2e-7, "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "cache_read_input_token_cost": 2e-7, "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "cache_read_input_token_cost": 4e-7, "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5412,22 +5908,34 @@ { "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5446,25 +5954,37 @@ "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "cache_read_input_token_cost": 1.8e-7, "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "cache_read_input_token_cost": 6e-7, "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5482,22 +6002,34 @@ { "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [128000, 256000] + "range": [ + 128000, + 256000 + ] }, { "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [256000, 1000000] + "range": [ + 256000, + 1000000 + ] } ] }, @@ -5515,17 +6047,26 @@ { "input_cost_per_token": 0.0000012, "output_cost_per_token": 0.000006, - "range": [0, 32000] + "range": [ + 0, + 32000 + ] }, { "input_cost_per_token": 0.0000024, "output_cost_per_token": 0.000012, - "range": [32000, 128000] + "range": [ + 32000, + 128000 + ] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [128000, 252000] + "range": [ + 128000, + 252000 + ] } ] }, @@ -5738,7 +6279,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-conversationalai": { "input_cost_per_second": 0.00020833, @@ -5750,7 +6293,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-finance": { "input_cost_per_second": 0.00020833, @@ -5762,7 +6307,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-general": { "input_cost_per_second": 0.00020833, @@ -5774,7 +6321,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-meeting": { "input_cost_per_second": 0.00020833, @@ -5786,7 +6335,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-phonecall": { "input_cost_per_second": 0.00020833, @@ -5798,7 +6349,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-video": { "input_cost_per_second": 0.00020833, @@ -5810,7 +6363,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/base-voicemail": { "input_cost_per_second": 0.00020833, @@ -5822,7 +6377,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced": { "input_cost_per_second": 0.00024167, @@ -5834,7 +6391,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-finance": { "input_cost_per_second": 0.00024167, @@ -5846,7 +6405,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-general": { "input_cost_per_second": 0.00024167, @@ -5858,7 +6419,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-meeting": { "input_cost_per_second": 0.00024167, @@ -5870,7 +6433,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/enhanced-phonecall": { "input_cost_per_second": 0.00024167, @@ -5882,7 +6447,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova": { "input_cost_per_second": 0.00007167, @@ -5894,7 +6461,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2": { "input_cost_per_second": 0.00007167, @@ -5906,7 +6475,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-atc": { "input_cost_per_second": 0.00007167, @@ -5918,7 +6489,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-automotive": { "input_cost_per_second": 0.00007167, @@ -5930,7 +6503,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-conversationalai": { "input_cost_per_second": 0.00007167, @@ -5942,7 +6517,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-drivethru": { "input_cost_per_second": 0.00007167, @@ -5954,7 +6531,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-finance": { "input_cost_per_second": 0.00007167, @@ -5966,7 +6545,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-general": { "input_cost_per_second": 0.00007167, @@ -5978,7 +6559,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-meeting": { "input_cost_per_second": 0.00007167, @@ -5990,7 +6573,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-phonecall": { "input_cost_per_second": 0.00007167, @@ -6002,7 +6587,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-video": { "input_cost_per_second": 0.00007167, @@ -6014,7 +6601,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-2-voicemail": { "input_cost_per_second": 0.00007167, @@ -6026,7 +6615,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3": { "input_cost_per_second": 0.00007167, @@ -6038,7 +6629,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3-general": { "input_cost_per_second": 0.00007167, @@ -6050,7 +6643,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-3-medical": { "input_cost_per_second": 0.00008667, @@ -6062,7 +6657,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-general": { "input_cost_per_second": 0.00007167, @@ -6074,7 +6671,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/nova-phonecall": { "input_cost_per_second": 0.00007167, @@ -6086,7 +6685,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper": { "input_cost_per_second": 0.0001, @@ -6097,7 +6698,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-base": { "input_cost_per_second": 0.0001, @@ -6108,7 +6711,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-large": { "input_cost_per_second": 0.0001, @@ -6119,7 +6724,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-medium": { "input_cost_per_second": 0.0001, @@ -6130,7 +6737,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-small": { "input_cost_per_second": 0.0001, @@ -6141,7 +6750,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepgram/whisper-tiny": { "input_cost_per_second": 0.0001, @@ -6152,632 +6763,684 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "deepinfra/Gryphe/MythoMax-L2-13b": { - "input_cost_per_token": 7.2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "max_tokens": 4096, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 9e-8, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 7.2e-8, "supports_tool_choice": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { - "input_cost_per_token": 7e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 8e-7, "supports_tool_choice": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.8e-7, "supports_tool_choice": false }, "deepinfra/Qwen/QwQ-32B": { - "input_cost_per_token": 1.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 1.5e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen2.5-72B-Instruct": { - "input_cost_per_token": 1.2e-7, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 1.2e-7, "output_cost_per_token": 3.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen2.5-7B-Instruct": { - "input_cost_per_token": 4e-8, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 4e-8, "output_cost_per_token": 1e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": { - "input_cost_per_token": 2e-7, - "litellm_provider": "deepinfra", + "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "max_tokens": 128000, - "mode": "chat", + "input_cost_per_token": 2e-7, "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-14B": { - "input_cost_per_token": 6e-8, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 6e-8, "output_cost_per_token": 2.4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B": { - "input_cost_per_token": 1.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.4e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 6e-7, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { - "input_cost_per_token": 1.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", + "input_cost_per_token": 9e-8, "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { - "input_cost_per_token": 1.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000029, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 6e-7, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-30B-A3B": { - "input_cost_per_token": 8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 8e-8, "output_cost_per_token": 2.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-32B": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2.8e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 3e-7, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { - "input_cost_per_token": 4e-7, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", + "input_cost_per_token": 4e-7, "output_cost_per_token": 0.0000016, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { - "cache_read_input_token_cost": 2.4e-7, - "input_cost_per_token": 3e-7, + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 0.0000012, "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.0000012, "supports_tool_choice": true }, "deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { - "input_cost_per_token": 2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", + "input_cost_per_token": 4e-8, "output_cost_per_token": 5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": { - "input_cost_per_token": 6.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 6.5e-7, "output_cost_per_token": 7.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": { - "input_cost_per_token": 6.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 6.5e-7, "output_cost_per_token": 7.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/allenai/olmOCR-7B-0725-FP8": { - "input_cost_per_token": 2.7e-7, - "litellm_provider": "deepinfra", + "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "max_tokens": 16384, - "mode": "chat", + "input_cost_per_token": 2.7e-7, "output_cost_per_token": 0.0000015, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/anthropic/claude-3-7-sonnet-latest": { - "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 0.0000033, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 0.0000033, "output_cost_per_token": 0.0000165, + "cache_read_input_token_cost": 3.3e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/anthropic/claude-4-opus": { - "input_cost_per_token": 0.0000165, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 0.0000165, "output_cost_per_token": 0.0000825, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/anthropic/claude-4-sonnet": { - "input_cost_per_token": 0.0000033, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 0.0000033, "output_cost_per_token": 0.0000165, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1": { - "input_cost_per_token": 7e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 7e-7, "output_cost_per_token": 0.0000024, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528": { - "cache_read_input_token_cost": 4e-7, - "input_cost_per_token": 5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 5e-7, "output_cost_per_token": 0.00000215, + "cache_read_input_token_cost": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { - "input_cost_per_token": 0.000001, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 4e-7, "supports_tool_choice": false }, "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { - "input_cost_per_token": 7.5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 2.7e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.5e-7, "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { - "input_cost_per_token": 0.000001, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3": { - "input_cost_per_token": 3.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 3.8e-7, "output_cost_per_token": 8.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3-0324": { - "cache_read_input_token_cost": 2.24e-7, - "input_cost_per_token": 2.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 2.5e-7, "output_cost_per_token": 8.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3.1": { - "cache_read_input_token_cost": 2.16e-7, + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "cache_read_input_token_cost": 2.16e-7, "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_reasoning": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 2.7e-7, "output_cost_per_token": 0.000001, - "supports_reasoning": true, + "cache_read_input_token_cost": 2.16e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemini-2.0-flash-001": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, - "mode": "chat", + "input_cost_per_token": 1e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemini-2.5-flash": { - "input_cost_per_token": 2.1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.00000175, "supports_tool_choice": true }, "deepinfra/google/gemini-2.5-pro": { - "input_cost_per_token": 8.75e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.000007, "supports_tool_choice": true }, "deepinfra/google/gemma-3-12b-it": { - "input_cost_per_token": 5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5e-8, "output_cost_per_token": 1e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemma-3-27b-it": { - "input_cost_per_token": 9e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 9e-8, + "output_cost_per_token": 1.6e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.7e-7, "supports_tool_choice": true }, "deepinfra/google/gemma-3-4b-it": { - "input_cost_per_token": 4e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4e-8, "output_cost_per_token": 8e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { - "input_cost_per_token": 4.9e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4.9e-8, "output_cost_per_token": 4.9e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Llama-3.2-3B-Instruct": { - "input_cost_per_token": 1.2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 2e-8, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.4e-8, "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct": { - "input_cost_per_token": 2.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 2.3e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { - "input_cost_per_token": 3.8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 3.9e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.2e-7, "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { - "input_cost_per_token": 1.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1048576, "max_input_tokens": 1048576, "max_output_tokens": 1048576, - "max_tokens": 1048576, - "mode": "chat", + "input_cost_per_token": 1.5e-7, "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { - "input_cost_per_token": 8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 327680, "max_input_tokens": 327680, "max_output_tokens": 327680, - "max_tokens": 327680, - "mode": "chat", + "input_cost_per_token": 8e-8, "output_cost_per_token": 3e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-Guard-3-8B": { - "input_cost_per_token": 5.5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5.5e-8, "output_cost_per_token": 5.5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Llama-Guard-4-12B": { - "input_cost_per_token": 1.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 1.8e-7, "output_cost_per_token": 1.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { - "input_cost_per_token": 3e-8, - "litellm_provider": "deepinfra", + "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", + "input_cost_per_token": 3e-8, "output_cost_per_token": 6e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { - "input_cost_per_token": 2.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 1e-7, "output_cost_per_token": 2.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { - "input_cost_per_token": 3e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 3e-8, "output_cost_per_token": 5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { - "input_cost_per_token": 1.5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2e-8, "supports_tool_choice": true }, "deepinfra/microsoft/WizardLM-2-8x22B": { - "input_cost_per_token": 4.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, - "max_tokens": 65536, - "mode": "chat", + "input_cost_per_token": 4.8e-7, "output_cost_per_token": 4.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/microsoft/phi-4": { - "input_cost_per_token": 7e-8, - "litellm_provider": "deepinfra", + "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "max_tokens": 16384, - "mode": "chat", + "input_cost_per_token": 7e-8, "output_cost_per_token": 1.4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { - "input_cost_per_token": 2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 2e-8, "output_cost_per_token": 4e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { - "input_cost_per_token": 5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 5e-8, "output_cost_per_token": 8e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { - "input_cost_per_token": 5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "max_tokens": 128000, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 2e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1e-7, "supports_tool_choice": true }, "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { - "input_cost_per_token": 8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.4e-7, "supports_tool_choice": true }, "deepinfra/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000002, + "litellm_provider": "deepinfra", "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-7, "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { - "input_cost_per_token": 1.2e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 3e-7, "supports_tool_choice": true }, - "deepinfra/openai/gpt-oss-120b": { - "input_cost_per_token": 9e-8, - "litellm_provider": "deepinfra", + "deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 4.5e-7, "supports_tool_choice": true }, - "deepinfra/openai/gpt-oss-20b": { + "deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.6e-7, "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/openai/gpt-oss-120b": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4.5e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.6e-7, "supports_tool_choice": true }, - "deepinfra/zai-org/GLM-4.5": { - "input_cost_per_token": 5.5e-7, - "litellm_provider": "deepinfra", + "deepinfra/openai/gpt-oss-20b": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.000002, "supports_tool_choice": true }, - "deepinfra/zai-org/GLM-4.5-Air": { - "input_cost_per_token": 2e-7, - "litellm_provider": "deepinfra", + "deepinfra/zai-org/GLM-4.5": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.0000011, "supports_tool_choice": true }, "deepseek/deepseek-chat": { @@ -6948,7 +7611,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "elevenlabs/scribe_v1_experimental": { "input_cost_per_second": 0.0000611, @@ -6961,7 +7626,9 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "embed-english-light-v2.0": { "input_cost_per_token": 1e-7, @@ -7246,6 +7913,36 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "eu.meta.llama3-2-1b-instruct-v1:0": { "input_cost_per_token": 1.3e-7, "litellm_provider": "bedrock", @@ -8340,8 +9037,16 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -8372,8 +9077,16 @@ "mode": "chat", "output_cost_per_token": 6e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8412,8 +9125,16 @@ "output_cost_per_token": 6e-7, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8440,8 +9161,15 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8469,8 +9197,15 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8502,9 +9237,20 @@ "output_cost_per_token": 0.000002, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -8535,8 +9281,16 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -8576,8 +9330,16 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8616,8 +9378,16 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": false, "supports_parallel_function_calling": true, @@ -8646,9 +9416,19 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8679,9 +9459,20 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8715,9 +9506,21 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8749,9 +9552,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8783,9 +9597,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8817,9 +9642,20 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8851,9 +9687,20 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8885,9 +9732,20 @@ "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8918,9 +9776,20 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -8952,9 +9821,19 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -8985,9 +9864,19 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9019,9 +9908,20 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9053,10 +9953,23 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], - "supported_regions": ["global"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supported_regions": [ + "global" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9088,9 +10001,20 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9122,8 +10046,12 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9556,8 +10484,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9588,8 +10524,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -9629,8 +10573,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -9658,8 +10610,15 @@ "output_cost_per_token": 3e-7, "rpm": 4000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -9688,8 +10647,15 @@ "output_cost_per_token": 3e-7, "rpm": 60000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -9721,9 +10687,20 @@ "output_cost_per_token": 0.0000015, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -9755,8 +10732,16 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9797,8 +10782,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -9838,8 +10831,16 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -9910,9 +10911,20 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9947,9 +10959,21 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text", "image"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9982,9 +11006,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10018,9 +11053,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10054,9 +11100,20 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10090,9 +11147,20 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10126,9 +11194,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10162,9 +11241,20 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10198,9 +11288,19 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -10232,9 +11332,19 @@ "output_cost_per_token": 0.0000025, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -10267,9 +11377,16 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10300,9 +11417,19 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10335,9 +11462,19 @@ "output_cost_per_token_above_200k_tokens": 0, "rpm": 5, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10370,8 +11507,15 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -10403,8 +11547,15 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -10437,8 +11588,15 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text", "image", "audio", "video"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -10471,8 +11629,12 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": ["text"], - "supported_output_modalities": ["audio"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "audio" + ], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10688,39 +11850,111 @@ "supports_audio_output": false, "supports_function_calling": true, "supports_response_schema": true, - "supports_system_messages": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/veo-2.0-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.35, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.0-fast-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "gemini/veo-3.0-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.75, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] + }, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true - }, - "gemini/veo-2.0-generate-001": { - "litellm_provider": "gemini", - "max_input_tokens": 1024, - "max_tokens": 1024, - "mode": "video_generation", - "output_cost_per_second": 0.35, - "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] - }, - "gemini/veo-3.0-fast-generate-preview": { - "litellm_provider": "gemini", - "max_input_tokens": 1024, - "max_tokens": 1024, - "mode": "video_generation", - "output_cost_per_second": 0.4, - "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] - }, - "gemini/veo-3.0-generate-preview": { - "litellm_provider": "gemini", - "max_input_tokens": 1024, - "max_tokens": 1024, - "mode": "video_generation", - "output_cost_per_second": 0.75, - "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 }, "gpt-3.5-turbo": { "input_cost_per_token": 5e-7, @@ -11024,9 +12258,18 @@ "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, "output_cost_per_token_priority": 0.000014, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11048,9 +12291,18 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11075,9 +12327,18 @@ "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "output_cost_per_token_priority": 0.0000028, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11099,9 +12360,18 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11126,9 +12396,18 @@ "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, "output_cost_per_token_priority": 8e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11150,9 +12429,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11529,7 +12817,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -11538,9 +12828,16 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/speech"], - "supported_modalities": ["text", "audio"], - "supported_output_modalities": ["audio"] + "supported_endpoints": [ + "/v1/audio/speech" + ], + "supported_modalities": [ + "text", + "audio" + ], + "supported_output_modalities": [ + "audio" + ] }, "gpt-4o-realtime-preview": { "cache_read_input_token_cost": 0.0000025, @@ -11669,7 +12966,9 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -11686,9 +12985,18 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11700,6 +13008,72 @@ "supports_tool_choice": true, "supports_vision": true }, + "gpt-5-pro": { + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-pro-2025-10-06": { + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, "gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_flex": 6.25e-8, @@ -11715,9 +13089,18 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11739,9 +13122,18 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -11757,14 +13149,23 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "openai", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -11785,9 +13186,16 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": ["/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -11814,9 +13222,18 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11843,9 +13260,18 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11870,9 +13296,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11896,9 +13331,18 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -11915,7 +13359,22 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "gpt-image-1-mini": { + "cache_read_input_image_token_cost": 2.5e-7, + "cache_read_input_token_cost": 2e-7, + "input_cost_per_image_token": 0.0000025, + "input_cost_per_token": 0.000002, + "litellm_provider": "openai", + "mode": "chat", + "output_cost_per_image_token": 0.000008, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, @@ -11930,9 +13389,49 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, + "gpt-realtime-mini": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "input_cost_per_audio_token": 0.00001, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11953,9 +13452,18 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": ["/v1/realtime"], - "supported_modalities": ["text", "image", "audio"], - "supported_output_modalities": ["text", "audio"], + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -11967,8 +13475,12 @@ "litellm_provider": "gradient_ai", "max_tokens": 2048, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3-opus": { @@ -11977,8 +13489,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000075, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-haiku": { @@ -11987,8 +13503,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000004, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-sonnet": { @@ -11997,8 +13517,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.7-sonnet": { @@ -12007,8 +13531,12 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/deepseek-r1-distill-llama-70b": { @@ -12017,8 +13545,12 @@ "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 9.9e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/llama3-8b-instruct": { @@ -12027,8 +13559,12 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 2e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/llama3.3-70b-instruct": { @@ -12037,8 +13573,12 @@ "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 6.5e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/mistral-nemo-instruct-2407": { @@ -12047,24 +13587,36 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 3e-7, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o-mini": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-o3": { @@ -12073,8 +13625,12 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "gradient_ai/openai-o3-mini": { @@ -12083,8 +13639,12 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": ["/v1/chat/completions"], - "supported_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], "supports_tool_choice": false }, "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { @@ -12388,6 +13948,19 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "groq/moonshotai/kimi-k2-instruct-0905": { + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "cache_read_input_token_cost": 5e-7, + "litellm_provider": "groq", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 278528, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "groq/openai/gpt-oss-120b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "groq", @@ -12506,21 +14079,27 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { "input_cost_per_token": 1.2e-7, @@ -12857,6 +14436,36 @@ "mode": "rerank", "output_cost_per_token": 1.8e-8 }, + "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "lambda_ai/deepseek-llama3.3-70b": { "input_cost_per_token": 2e-7, "litellm_provider": "lambda_ai", @@ -13127,21 +14736,27 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "luminous-base": { "input_cost_per_token": 0.00003, @@ -13204,21 +14819,75 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.005, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.011, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] }, "medlm-large": { "input_cost_per_character": 0.000005, @@ -13378,8 +15047,14 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -13393,8 +15068,14 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -13405,8 +15086,12 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -13417,8 +15102,12 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -13429,8 +15118,13 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -13441,8 +15135,13 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -14146,8 +15845,14 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image", "video"] + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] }, "multimodalembedding@001": { "input_cost_per_character": 2e-7, @@ -14163,8 +15868,14 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": ["/v1/embeddings"], - "supported_modalities": ["text", "image", "video"] + "supported_endpoints": [ + "/v1/embeddings" + ], + "supported_modalities": [ + "text", + "image", + "video" + ] }, "nscale/Qwen/QwQ-32B": { "input_cost_per_token": 1.8e-7, @@ -14200,7 +15911,9 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 3.75e-7, @@ -14305,7 +16018,9 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "o1": { "cache_read_input_token_cost": 0.0000075, @@ -14410,9 +16125,17 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -14434,9 +16157,17 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -14469,8 +16200,13 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -14495,8 +16231,13 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -14517,9 +16258,18 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14541,9 +16291,18 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14598,9 +16357,17 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -14620,9 +16387,17 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": ["/v1/responses", "/v1/batch"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/responses", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -14685,9 +16460,18 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14709,9 +16493,18 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -14842,6 +16635,42 @@ "supports_function_calling": true, "supports_response_schema": false }, + "oci/cohere.command-latest": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-03-2025": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-plus-latest": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, "ollama/codegeex4": { "input_cost_per_token": 0, "litellm_provider": "ollama", @@ -15375,6 +17204,25 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "openrouter/anthropic/claude-sonnet-4.5": { + "input_cost_per_image": 0.0048, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "openrouter/bytedance/ui-tars-1.5-7b": { "input_cost_per_token": 1e-7, "litellm_provider": "openrouter", @@ -15918,8 +17766,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -15932,8 +17785,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -15946,8 +17804,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -15960,8 +17823,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -15974,8 +17842,13 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_reasoning": true, "supports_tool_choice": true }, @@ -16772,14 +18645,18 @@ "mode": "image_generation", "output_cost_per_image": 0.022, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "recraft/recraftv3": { "litellm_provider": "recraft", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": ["/v1/images/generations"] + "supported_endpoints": [ + "/v1/images/generations" + ] }, "replicate/meta/llama-2-13b": { "input_cost_per_token": 1e-7, @@ -16966,6 +18843,20 @@ "mode": "rerank", "output_cost_per_token": 0 }, + "nvidia_nim/nvidia/nv-rerankqa-mistral-4b-v3": { + "input_cost_per_query": 0, + "input_cost_per_token": 0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0 + }, + "nvidia_nim/nvidia/llama-3_2-nv-rerankqa-1b-v2": { + "input_cost_per_query": 0, + "input_cost_per_token": 0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0 + }, "sagemaker/meta-textgeneration-llama-2-13b": { "input_cost_per_token": 0, "litellm_provider": "sagemaker", @@ -17236,7 +19127,13 @@ "search_context_size_low": 0, "search_context_size_medium": 0 }, - "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], + "supported_regions": [ + "global", + "us-west-2", + "eu-west-1", + "ap-southeast-1", + "ap-northeast-1" + ], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -17804,6 +19701,22 @@ "mode": "embedding", "output_cost_per_token": 0 }, + "together_ai/baai/bge-base-en-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768 + }, + "together_ai/BAAI/bge-base-en-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768 + }, "together-ai-up-to-4b": { "input_cost_per_token": 1e-7, "litellm_provider": "together_ai", @@ -18064,17 +19977,54 @@ "supports_parallel_function_calling": true, "supports_tool_choice": true }, + "together_ai/moonshotai/Kimi-K2-Instruct-0905": { + "input_cost_per_token": 0.000001, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://www.together.ai/models/kimi-k2-0905", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, "tts-1": { "input_cost_per_character": 0.000015, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": ["/v1/audio/speech"] + "supported_endpoints": [ + "/v1/audio/speech" + ] }, "tts-1-hd": { "input_cost_per_character": 0.00003, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": ["/v1/audio/speech"] + "supported_endpoints": [ + "/v1/audio/speech" + ] }, "us.amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -18268,15 +20218,19 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 0.00000375, - "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 0.000003, + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 0.000015, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -18461,8 +20415,14 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -18476,8 +20436,14 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": false }, @@ -19625,6 +21591,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, @@ -19647,6 +21617,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, @@ -19793,7 +21767,9 @@ "mode": "chat", "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_regions": ["us-west2"], + "supported_regions": [ + "us-west2" + ], "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -19974,8 +21950,14 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -19988,8 +21970,14 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -20002,8 +21990,14 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -20016,8 +22010,14 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": ["text", "image"], - "supported_output_modalities": ["text", "code"], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text", + "code" + ], "supports_function_calling": true, "supports_tool_choice": true }, @@ -20220,8 +22220,12 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -20230,8 +22234,12 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "vertex_ai/veo-3.0-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -20240,8 +22248,12 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supported_modalities": [ + "text" + ], + "supported_output_modalities": [ + "video" + ] }, "voyage/rerank-2": { "input_cost_per_query": 5e-8, @@ -20531,12 +22543,314 @@ "supports_tool_choice": true, "supports_vision": false }, + "watsonx/bigscience/mt0-xxl-13b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/core42/jais-13b-chat": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/google/flan-t5-xl-3b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0001, + "output_cost_per_token": 0.00025, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-chat-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-instruct-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-3-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-4-h-small": { + "max_tokens": 20480, + "max_input_tokens": 20480, + "max_output_tokens": 20480, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.0025, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-3-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1024-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1536-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-512-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-vision-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-11b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-1b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0001, + "output_cost_per_token": 0.0002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-3b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-90b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.002, + "output_cost_per_token": 0.008, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.002, + "output_cost_per_token": 0.006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-4-maverick-17b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-guard-3-11b-vision": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/mistralai/mistral-medium-2505": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00225, + "output_cost_per_token": 0.00675, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0002, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.00015, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/openai/gpt-oss-120b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.004, + "output_cost_per_token": 0.016, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/sdaia/allam-1-13b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, "whisper-1": { "input_cost_per_second": 0.0001, "litellm_provider": "openai", "mode": "audio_transcription", "output_cost_per_second": 0.0001, - "supported_endpoints": ["/v1/audio/transcriptions"] + "supported_endpoints": [ + "/v1/audio/transcriptions" + ] }, "xai/grok-2": { "input_cost_per_token": 0.000002, @@ -20916,4 +23230,4 @@ "supports_vision": true, "supports_web_search": true } -} +} \ No newline at end of file From 02f1b71d4fcfcd36fa4af43ed6b358b4d2053836 Mon Sep 17 00:00:00 2001 From: Ammar Date: Wed, 15 Oct 2025 13:37:41 -0500 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=A4=96=20Fix=20LRU=20not=20updating?= =?UTF-8?q?=20when=20model=20is=20selected?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user selects a model via ModelSelector, it now updates both: 1. The workspace-specific model storage (model:) 2. The global LRU cache (most recently used models) This ensures that newly selected models become the default for new workspaces, creating a natural user-driven flow where the most recently used model is suggested first. --- src/components/ChatInput.tsx | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/components/ChatInput.tsx b/src/components/ChatInput.tsx index b3a0005154..565dd429ee 100644 --- a/src/components/ChatInput.tsx +++ b/src/components/ChatInput.tsx @@ -350,7 +350,7 @@ export const ChatInput: React.FC = ({ const inputRef = useRef(null); const modelSelectorRef = useRef(null); const [mode, setMode] = useMode(); - const { recentModels } = useModelLRU(); + const { recentModels, addModel } = useModelLRU(); const commandListId = useId(); // Get current send message options from shared hook (must be at component top level) @@ -359,8 +359,11 @@ export const ChatInput: React.FC = ({ const preferredModel = sendMessageOptions.model; // Setter for model - updates localStorage directly so useSendMessageOptions picks it up const setPreferredModel = useCallback( - (model: string) => updatePersistedState(getModelKey(workspaceId), model), - [workspaceId] + (model: string) => { + addModel(model); // Update LRU + updatePersistedState(getModelKey(workspaceId), model); // Update workspace-specific + }, + [workspaceId, addModel] ); const focusMessageInput = useCallback(() => { From 445803a31d806eb9598776e113db9816a0a297cf Mon Sep 17 00:00:00 2001 From: Ammar Date: Wed, 15 Oct 2025 13:42:02 -0500 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=A4=96=20Fix=20empty=20LRU=20on=20fir?= =?UTF-8?q?st=20render?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize LRU with defaults immediately instead of waiting for useEffect. This prevents crashes when components access recentModels[0] before the useEffect has run to seed the LRU with default models. Changes: - usePersistedState now initializes with DEFAULT_MODELS instead of [] - useEffect simplified to only handle merging new defaults - getDefaultModelFromLRU uses same default initialization - LRU is now never empty, even on first render Fixes crash reported by Codex where ModelSelector calls .trim() on undefined. --- src/debug/agentSessionCli.ts | 3 +- src/hooks/useModelLRU.ts | 19 +- src/utils/tokens/models.json | 2466 +++++++--------------------------- 3 files changed, 521 insertions(+), 1967 deletions(-) diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts index 1fd786a570..4efe6f76f8 100644 --- a/src/debug/agentSessionCli.ts +++ b/src/debug/agentSessionCli.ts @@ -184,7 +184,8 @@ async function main(): Promise { throw new Error("Message must be provided via --message or stdin"); } - const model = values.model && values.model.trim().length > 0 ? values.model.trim() : getDefaultModelFromLRU(); + const model = + values.model && values.model.trim().length > 0 ? values.model.trim() : getDefaultModelFromLRU(); const timeoutMs = parseTimeout(values.timeout); const thinkingLevel = parseThinkingLevel(values["thinking-level"]); const initialMode = parseMode(values.mode); diff --git a/src/hooks/useModelLRU.ts b/src/hooks/useModelLRU.ts index 0e2575713d..9ec1d47fd4 100644 --- a/src/hooks/useModelLRU.ts +++ b/src/hooks/useModelLRU.ts @@ -12,11 +12,11 @@ const DEFAULT_MODELS = Object.values(MODEL_ABBREVIATIONS); /** * Get the default model from LRU (non-hook version for use outside React) * This is the ONLY place that reads from LRU outside of the hook. - * + * * @returns The most recently used model, or defaultModel if LRU is empty */ export function getDefaultModelFromLRU(): string { - const lru = readPersistedState(LRU_KEY, []); + const lru = readPersistedState(LRU_KEY, DEFAULT_MODELS.slice(0, MAX_LRU_SIZE)); return lru[0] ?? defaultModel; } @@ -26,25 +26,20 @@ export function getDefaultModelFromLRU(): string { * Initializes with default abbreviated models if empty. */ export function useModelLRU() { - const [recentModels, setRecentModels] = usePersistedState(LRU_KEY, []); + const [recentModels, setRecentModels] = usePersistedState( + LRU_KEY, + DEFAULT_MODELS.slice(0, MAX_LRU_SIZE) + ); - // Ensure default models are always present in the LRU (only once on mount) + // Merge any new defaults from MODEL_ABBREVIATIONS (only once on mount) useEffect(() => { setRecentModels((prev) => { - // If empty, just use defaults - if (prev.length === 0) { - return DEFAULT_MODELS.slice(0, MAX_LRU_SIZE); - } - - // If we have some models, merge with defaults (keeping existing order, adding missing defaults at end) const merged = [...prev]; for (const defaultModel of DEFAULT_MODELS) { if (!merged.includes(defaultModel)) { merged.push(defaultModel); } } - - // Limit to MAX_LRU_SIZE return merged.slice(0, MAX_LRU_SIZE); }); // eslint-disable-next-line react-hooks/exhaustive-deps diff --git a/src/utils/tokens/models.json b/src/utils/tokens/models.json index 02e94181eb..ae6f03b523 100644 --- a/src/utils/tokens/models.json +++ b/src/utils/tokens/models.json @@ -105,9 +105,7 @@ "mode": "image_generation", "output_cost_per_image": 0.021, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/dall-e-3": { "litellm_provider": "aiml", @@ -117,9 +115,7 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro": { "litellm_provider": "aiml", @@ -129,25 +125,19 @@ "mode": "image_generation", "output_cost_per_image": 0.053, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro/v1.1": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.042, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-pro/v1.1-ultra": { "litellm_provider": "aiml", "mode": "image_generation", "output_cost_per_image": 0.063, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux-realism": { "litellm_provider": "aiml", @@ -157,9 +147,7 @@ "mode": "image_generation", "output_cost_per_image": 0.037, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/dev": { "litellm_provider": "aiml", @@ -169,9 +157,7 @@ "mode": "image_generation", "output_cost_per_image": 0.026, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/kontext-max/text-to-image": { "litellm_provider": "aiml", @@ -181,9 +167,7 @@ "mode": "image_generation", "output_cost_per_image": 0.084, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/kontext-pro/text-to-image": { "litellm_provider": "aiml", @@ -193,9 +177,7 @@ "mode": "image_generation", "output_cost_per_image": 0.042, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "aiml/flux/schnell": { "litellm_provider": "aiml", @@ -205,9 +187,7 @@ "mode": "image_generation", "output_cost_per_image": 0.003, "source": "https://docs.aimlapi.com/", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -913,16 +893,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -951,16 +924,9 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -1067,14 +1033,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1467,18 +1427,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1500,18 +1451,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1533,18 +1475,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1566,18 +1499,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1599,18 +1523,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1631,18 +1546,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1743,17 +1649,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1806,17 +1704,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1854,9 +1744,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -1865,16 +1753,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/speech" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "audio" - ] + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] }, "azure/gpt-4o-realtime-preview-2024-10-01": { "cache_creation_input_audio_token_cost": 0.00002, @@ -1906,14 +1787,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.00002, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -1929,9 +1804,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "azure/gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -1942,18 +1815,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -1974,18 +1838,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2007,18 +1862,9 @@ "mode": "chat", "output_cost_per_token": 0.00001, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2039,18 +1885,9 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2071,16 +1908,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2101,18 +1931,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2133,18 +1954,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2165,18 +1977,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2197,18 +2000,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -2225,9 +2019,7 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/hd/1024-x-1024/dall-e-3": { "input_cost_per_pixel": 7.629e-8, @@ -2252,81 +2044,63 @@ "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0490417e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "azure", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure/mistral-large-2402": { "input_cost_per_token": 0.000008, @@ -2448,18 +2222,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2477,18 +2242,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2506,18 +2262,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -2568,18 +2315,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2598,18 +2336,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": false, @@ -2627,18 +2356,9 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_prompt_caching": true, @@ -2819,14 +2539,8 @@ "mode": "chat", "output_cost_per_audio_token": 0.00008, "output_cost_per_token": 0.000022, - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -2928,18 +2642,14 @@ "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/black-forest-labs-flux-1-kontext-pro-and-flux1-1-pro-now-available-in-azure-ai-f/4434659", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure_ai/FLUX.1-Kontext-pro": { "litellm_provider": "azure_ai", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "azure_ai/Llama-3.2-11B-Vision-Instruct": { "input_cost_per_token": 3.7e-7, @@ -3307,13 +3017,8 @@ "output_cost_per_token": 0, "output_vector_size": 3072, "source": "https://azuremarketplace.microsoft.com/pt-br/marketplace/apps/cohere.cohere-embed-4-offer?tab=PlansAndPrice", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image"], "supports_embedding_image_input": true }, "azure_ai/global/grok-3": { @@ -5282,16 +4987,9 @@ "max_tokens": 100000, "mode": "responses", "output_cost_per_token": 0.000006, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -5486,16 +5184,9 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000012, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_prompt_caching": false, @@ -5515,9 +5206,7 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": [ - "/v1/chat/completions" - ], + "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -5536,9 +5225,7 @@ "mode": "chat", "output_cost_per_token": 0.0000017, "source": "https://api-docs.deepseek.com/quick_start/pricing", - "supported_endpoints": [ - "/v1/chat/completions" - ], + "supported_endpoints": ["/v1/chat/completions"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -5575,18 +5262,12 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5604,18 +5285,12 @@ { "input_cost_per_token": 5e-8, "output_cost_per_token": 4e-7, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 2.5e-7, "output_cost_per_token": 0.000002, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5701,19 +5376,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5732,19 +5401,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5763,19 +5426,13 @@ "input_cost_per_token": 4e-7, "output_cost_per_reasoning_token": 0.000004, "output_cost_per_token": 0.0000012, - "range": [ - 0, - 256000 - ] + "range": [0, 256000] }, { "input_cost_per_token": 0.0000012, "output_cost_per_reasoning_token": 0.000012, "output_cost_per_token": 0.0000036, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5860,37 +5517,25 @@ "cache_read_input_token_cost": 8e-8, "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.2e-7, "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "cache_read_input_token_cost": 2e-7, "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "cache_read_input_token_cost": 4e-7, "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5908,34 +5553,22 @@ { "input_cost_per_token": 3e-7, "output_cost_per_token": 0.0000015, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0000025, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 8e-7, "output_cost_per_token": 0.000004, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "input_cost_per_token": 0.0000016, "output_cost_per_token": 0.0000096, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -5954,37 +5587,25 @@ "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "cache_read_input_token_cost": 1.8e-7, "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "cache_read_input_token_cost": 6e-7, "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6002,34 +5623,22 @@ { "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000005, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 0.0000018, "output_cost_per_token": 0.000009, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 256000 - ] + "range": [128000, 256000] }, { "input_cost_per_token": 0.000006, "output_cost_per_token": 0.00006, - "range": [ - 256000, - 1000000 - ] + "range": [256000, 1000000] } ] }, @@ -6047,26 +5656,17 @@ { "input_cost_per_token": 0.0000012, "output_cost_per_token": 0.000006, - "range": [ - 0, - 32000 - ] + "range": [0, 32000] }, { "input_cost_per_token": 0.0000024, "output_cost_per_token": 0.000012, - "range": [ - 32000, - 128000 - ] + "range": [32000, 128000] }, { "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015, - "range": [ - 128000, - 252000 - ] + "range": [128000, 252000] } ] }, @@ -6279,9 +5879,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-conversationalai": { "input_cost_per_second": 0.00020833, @@ -6293,9 +5891,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-finance": { "input_cost_per_second": 0.00020833, @@ -6307,9 +5903,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-general": { "input_cost_per_second": 0.00020833, @@ -6321,9 +5915,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-meeting": { "input_cost_per_second": 0.00020833, @@ -6335,9 +5927,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-phonecall": { "input_cost_per_second": 0.00020833, @@ -6349,9 +5939,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-video": { "input_cost_per_second": 0.00020833, @@ -6363,9 +5951,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/base-voicemail": { "input_cost_per_second": 0.00020833, @@ -6377,9 +5963,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced": { "input_cost_per_second": 0.00024167, @@ -6391,9 +5975,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-finance": { "input_cost_per_second": 0.00024167, @@ -6405,9 +5987,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-general": { "input_cost_per_second": 0.00024167, @@ -6419,9 +5999,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-meeting": { "input_cost_per_second": 0.00024167, @@ -6433,9 +6011,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/enhanced-phonecall": { "input_cost_per_second": 0.00024167, @@ -6447,9 +6023,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova": { "input_cost_per_second": 0.00007167, @@ -6461,9 +6035,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2": { "input_cost_per_second": 0.00007167, @@ -6475,9 +6047,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-atc": { "input_cost_per_second": 0.00007167, @@ -6489,9 +6059,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-automotive": { "input_cost_per_second": 0.00007167, @@ -6503,9 +6071,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-conversationalai": { "input_cost_per_second": 0.00007167, @@ -6517,9 +6083,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-drivethru": { "input_cost_per_second": 0.00007167, @@ -6531,9 +6095,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-finance": { "input_cost_per_second": 0.00007167, @@ -6545,9 +6107,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-general": { "input_cost_per_second": 0.00007167, @@ -6559,9 +6119,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-meeting": { "input_cost_per_second": 0.00007167, @@ -6573,9 +6131,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-phonecall": { "input_cost_per_second": 0.00007167, @@ -6587,9 +6143,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-video": { "input_cost_per_second": 0.00007167, @@ -6601,9 +6155,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-2-voicemail": { "input_cost_per_second": 0.00007167, @@ -6615,9 +6167,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3": { "input_cost_per_second": 0.00007167, @@ -6629,9 +6179,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-general": { "input_cost_per_second": 0.00007167, @@ -6643,9 +6191,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-3-medical": { "input_cost_per_second": 0.00008667, @@ -6657,9 +6203,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-general": { "input_cost_per_second": 0.00007167, @@ -6671,9 +6215,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/nova-phonecall": { "input_cost_per_second": 0.00007167, @@ -6685,9 +6227,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper": { "input_cost_per_second": 0.0001, @@ -6698,9 +6238,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-base": { "input_cost_per_second": 0.0001, @@ -6711,9 +6249,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-large": { "input_cost_per_second": 0.0001, @@ -6724,9 +6260,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-medium": { "input_cost_per_second": 0.0001, @@ -6737,9 +6271,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-small": { "input_cost_per_second": 0.0001, @@ -6750,9 +6282,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepgram/whisper-tiny": { "input_cost_per_second": 0.0001, @@ -6763,9 +6293,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://deepgram.com/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepinfra/Gryphe/MythoMax-L2-13b": { "max_tokens": 4096, @@ -7611,9 +7139,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "elevenlabs/scribe_v1_experimental": { "input_cost_per_second": 0.0000611, @@ -7626,9 +7152,7 @@ "mode": "audio_transcription", "output_cost_per_second": 0, "source": "https://elevenlabs.io/pricing", - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "embed-english-light-v2.0": { "input_cost_per_token": 1e-7, @@ -9037,16 +8561,8 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9077,16 +8593,8 @@ "mode": "chat", "output_cost_per_token": 6e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9125,16 +8633,8 @@ "output_cost_per_token": 6e-7, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9161,15 +8661,8 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9197,15 +8690,8 @@ "mode": "chat", "output_cost_per_token": 3e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9237,20 +8723,9 @@ "output_cost_per_token": 0.000002, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini#gemini-2-0-flash-live-preview-04-09", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -9281,16 +8756,8 @@ "mode": "chat", "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -9330,16 +8797,8 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9378,16 +8837,8 @@ "output_cost_per_token": 0, "output_cost_per_token_above_128k_tokens": 0, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": false, "supports_parallel_function_calling": true, @@ -9416,19 +8867,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9459,20 +8900,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9506,21 +8936,9 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9552,20 +8970,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9597,20 +9004,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9642,20 +9038,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9687,20 +9072,9 @@ "output_cost_per_reasoning_token": 4e-7, "output_cost_per_token": 4e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9732,20 +9106,9 @@ "output_cost_per_reasoning_token": 0.0000035, "output_cost_per_token": 6e-7, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9776,20 +9139,9 @@ "output_cost_per_reasoning_token": 0.0000025, "output_cost_per_token": 0.0000025, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9821,19 +9173,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -9864,19 +9206,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9908,20 +9240,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -9953,23 +9274,10 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], - "supported_regions": [ - "global" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], + "supported_regions": ["global"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10001,20 +9309,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10046,12 +9343,8 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_above_200k_tokens": 0.000015, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10484,16 +9777,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10524,16 +9809,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10573,16 +9850,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10610,15 +9879,8 @@ "output_cost_per_token": 3e-7, "rpm": 4000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10647,15 +9909,8 @@ "output_cost_per_token": 3e-7, "rpm": 60000, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10687,20 +9942,9 @@ "output_cost_per_token": 0.0000015, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2-0-flash-live-001", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "audio"], "supports_audio_output": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -10732,16 +9976,8 @@ "output_cost_per_token": 4e-7, "rpm": 10000, "source": "https://ai.google.dev/pricing#2_0flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -10782,16 +10018,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10831,16 +10059,8 @@ "output_cost_per_token_above_128k_tokens": 0, "rpm": 10, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10911,20 +10131,9 @@ "output_cost_per_token": 0.0000025, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -10959,21 +10168,9 @@ "output_cost_per_token": 0.00003, "rpm": 100000, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text", - "image" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text", "image"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11006,20 +10203,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11053,20 +10239,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11100,20 +10275,9 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11147,20 +10311,9 @@ "output_cost_per_token": 0.0000025, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11194,20 +10347,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11241,20 +10383,9 @@ "output_cost_per_token": 4e-7, "rpm": 15, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-lite", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -11288,19 +10419,9 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11332,19 +10453,9 @@ "output_cost_per_token": 0.0000025, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11377,16 +10488,9 @@ "output_cost_per_token": 6e-7, "rpm": 10, "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11417,19 +10521,9 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -11462,19 +10556,9 @@ "output_cost_per_token_above_200k_tokens": 0, "rpm": 5, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/completions"], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_input": true, "supports_function_calling": true, "supports_pdf_input": true, @@ -11507,15 +10591,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11547,15 +10624,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11588,15 +10658,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image", "audio", "video"], + "supported_output_modalities": ["text"], "supports_audio_output": false, "supports_function_calling": true, "supports_pdf_input": true, @@ -11629,12 +10692,8 @@ "output_cost_per_token_above_200k_tokens": 0.000015, "rpm": 10000, "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "audio" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["audio"], "supports_audio_output": false, "supports_function_calling": true, "supports_prompt_caching": true, @@ -11861,12 +10920,8 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.0-fast-generate-preview": { "litellm_provider": "gemini", @@ -11875,12 +10930,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "gemini/veo-3.0-generate-preview": { "litellm_provider": "gemini", @@ -11889,12 +10940,8 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -12258,18 +11305,9 @@ "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, "output_cost_per_token_priority": 0.000014, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12291,18 +11329,9 @@ "mode": "chat", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12327,18 +11356,9 @@ "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "output_cost_per_token_priority": 0.0000028, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12360,18 +11380,9 @@ "mode": "chat", "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12396,18 +11407,9 @@ "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, "output_cost_per_token_priority": 8e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12428,19 +11430,10 @@ "max_tokens": 32768, "mode": "chat", "output_cost_per_token": 4e-7, - "output_cost_per_token_batches": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "output_cost_per_token_batches": 2e-7, + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -12817,9 +11810,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.000005, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-4o-mini-tts": { "input_cost_per_token": 0.0000025, @@ -12828,16 +11819,9 @@ "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/speech" - ], - "supported_modalities": [ - "text", - "audio" - ], - "supported_output_modalities": [ - "audio" - ] + "supported_endpoints": ["/v1/audio/speech"], + "supported_modalities": ["text", "audio"], + "supported_output_modalities": ["audio"] }, "gpt-4o-realtime-preview": { "cache_read_input_token_cost": 0.0000025, @@ -12966,9 +11950,7 @@ "max_output_tokens": 2000, "mode": "audio_transcription", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "gpt-5": { "cache_read_input_token_cost": 1.25e-7, @@ -12985,18 +11967,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13018,17 +11991,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": [ - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -13051,17 +12016,9 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 0.00006, - "supported_endpoints": [ - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -13089,18 +12046,9 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13122,18 +12070,9 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -13154,18 +12093,9 @@ "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": false, "supports_native_streaming": true, "supports_parallel_function_calling": false, @@ -13186,16 +12116,9 @@ "max_tokens": 128000, "mode": "responses", "output_cost_per_token": 0.00001, - "supported_endpoints": [ - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -13222,18 +12145,9 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13260,18 +12174,9 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13296,18 +12201,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13331,18 +12227,9 @@ "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -13359,9 +12246,7 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "gpt-image-1-mini": { "cache_read_input_image_token_cost": 2.5e-7, @@ -13371,10 +12256,7 @@ "litellm_provider": "openai", "mode": "chat", "output_cost_per_image_token": 0.000008, - "supported_endpoints": [ - "/v1/images/generations", - "/v1/images/edits" - ] + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, @@ -13389,18 +12271,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13420,18 +12293,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.00002, "output_cost_per_token": 0.0000024, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13452,18 +12316,9 @@ "mode": "chat", "output_cost_per_audio_token": 0.000064, "output_cost_per_token": 0.000016, - "supported_endpoints": [ - "/v1/realtime" - ], - "supported_modalities": [ - "text", - "image", - "audio" - ], - "supported_output_modalities": [ - "text", - "audio" - ], + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -13475,12 +12330,8 @@ "litellm_provider": "gradient_ai", "max_tokens": 2048, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3-opus": { @@ -13489,12 +12340,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000075, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-haiku": { @@ -13503,12 +12350,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.5-sonnet": { @@ -13517,12 +12360,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/anthropic-claude-3.7-sonnet": { @@ -13531,12 +12370,8 @@ "max_tokens": 1024, "mode": "chat", "output_cost_per_token": 0.000015, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/deepseek-r1-distill-llama-70b": { @@ -13545,12 +12380,8 @@ "max_tokens": 8000, "mode": "chat", "output_cost_per_token": 9.9e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/llama3-8b-instruct": { @@ -13559,12 +12390,8 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 2e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/llama3.3-70b-instruct": { @@ -13573,12 +12400,8 @@ "max_tokens": 2048, "mode": "chat", "output_cost_per_token": 6.5e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/mistral-nemo-instruct-2407": { @@ -13587,36 +12410,24 @@ "max_tokens": 512, "mode": "chat", "output_cost_per_token": 3e-7, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-gpt-4o-mini": { "litellm_provider": "gradient_ai", "max_tokens": 16384, "mode": "chat", - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-o3": { @@ -13625,12 +12436,8 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.000008, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "gradient_ai/openai-o3-mini": { @@ -13639,12 +12446,8 @@ "max_tokens": 100000, "mode": "chat", "output_cost_per_token": 0.0000044, - "supported_endpoints": [ - "/v1/chat/completions" - ], - "supported_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions"], + "supported_modalities": ["text"], "supports_tool_choice": false }, "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { @@ -14079,27 +12882,21 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "high/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "high/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.58945719e-7, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "hyperbolic/NousResearch/Hermes-3-Llama-3.1-70B": { "input_cost_per_token": 1.2e-7, @@ -14736,27 +13533,21 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 1.0172526e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "luminous-base": { "input_cost_per_token": 0.00003, @@ -14819,75 +13610,57 @@ "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1536/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1536-x-1024/gpt-image-1": { "input_cost_per_pixel": 4.0054321e-8, "litellm_provider": "openai", "mode": "image_generation", "output_cost_per_pixel": 0, - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.005, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "low/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.006, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.011, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1024-x-1536/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medium/1536-x-1024/gpt-image-1-mini": { "input_cost_per_image": 0.015, "litellm_provider": "openai", "mode": "image_generation", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "medlm-large": { "input_cost_per_character": 0.000005, @@ -15047,14 +13820,8 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -15068,14 +13835,8 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -15086,12 +13847,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15102,12 +13859,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15118,13 +13871,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15135,13 +13883,8 @@ "max_tokens": 128000, "mode": "chat", "source": "https://llama.developer.meta.com/docs/models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -15845,14 +14588,8 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image", - "video" - ] + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] }, "multimodalembedding@001": { "input_cost_per_character": 2e-7, @@ -15868,14 +14605,8 @@ "output_cost_per_token": 0, "output_vector_size": 768, "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models", - "supported_endpoints": [ - "/v1/embeddings" - ], - "supported_modalities": [ - "text", - "image", - "video" - ] + "supported_endpoints": ["/v1/embeddings"], + "supported_modalities": ["text", "image", "video"] }, "nscale/Qwen/QwQ-32B": { "input_cost_per_token": 1.8e-7, @@ -15911,9 +14642,7 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { "input_cost_per_token": 3.75e-7, @@ -16018,9 +14747,7 @@ "mode": "image_generation", "output_cost_per_pixel": 0, "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "o1": { "cache_read_input_token_cost": 0.0000075, @@ -16125,17 +14852,9 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -16157,17 +14876,9 @@ "mode": "responses", "output_cost_per_token": 0.0006, "output_cost_per_token_batches": 0.0003, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": false, "supports_parallel_function_calling": true, @@ -16200,13 +14911,8 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -16231,13 +14937,8 @@ "/v1/completions", "/v1/batch" ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -16258,18 +14959,9 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16291,18 +14983,9 @@ "mode": "responses", "output_cost_per_token": 0.00004, "output_cost_per_token_batches": 0.00002, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16357,17 +15040,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -16387,17 +15062,9 @@ "mode": "responses", "output_cost_per_token": 0.00008, "output_cost_per_token_batches": 0.00004, - "supported_endpoints": [ - "/v1/responses", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/responses", "/v1/batch"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_parallel_function_calling": false, "supports_pdf_input": true, @@ -16460,18 +15127,9 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -16493,18 +15151,9 @@ "mode": "responses", "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/batch", - "/v1/responses" - ], - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_function_calling": true, "supports_native_streaming": true, "supports_parallel_function_calling": true, @@ -17766,13 +16415,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17785,13 +16429,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17804,13 +16443,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.00001, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17823,13 +16457,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000002, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -17842,13 +16471,8 @@ "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 4e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], "supports_reasoning": true, "supports_tool_choice": true }, @@ -18645,18 +17269,14 @@ "mode": "image_generation", "output_cost_per_image": 0.022, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "recraft/recraftv3": { "litellm_provider": "recraft", "mode": "image_generation", "output_cost_per_image": 0.04, "source": "https://www.recraft.ai/docs#pricing", - "supported_endpoints": [ - "/v1/images/generations" - ] + "supported_endpoints": ["/v1/images/generations"] }, "replicate/meta/llama-2-13b": { "input_cost_per_token": 1e-7, @@ -19127,13 +17747,7 @@ "search_context_size_low": 0, "search_context_size_medium": 0 }, - "supported_regions": [ - "global", - "us-west-2", - "eu-west-1", - "ap-southeast-1", - "ap-northeast-1" - ], + "supported_regions": ["global", "us-west-2", "eu-west-1", "ap-southeast-1", "ap-northeast-1"], "supports_audio_input": true, "supports_audio_output": true, "supports_function_calling": true, @@ -20014,17 +18628,13 @@ "input_cost_per_character": 0.000015, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": [ - "/v1/audio/speech" - ] + "supported_endpoints": ["/v1/audio/speech"] }, "tts-1-hd": { "input_cost_per_character": 0.00003, "litellm_provider": "openai", "mode": "audio_speech", - "supported_endpoints": [ - "/v1/audio/speech" - ] + "supported_endpoints": ["/v1/audio/speech"] }, "us.amazon.nova-lite-v1:0": { "input_cost_per_token": 6e-8, @@ -20415,14 +19025,8 @@ "mode": "chat", "output_cost_per_token": 9.7e-7, "output_cost_per_token_batches": 4.85e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -20436,14 +19040,8 @@ "mode": "chat", "output_cost_per_token": 6.6e-7, "output_cost_per_token_batches": 3.3e-7, - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": false }, @@ -21767,9 +20365,7 @@ "mode": "chat", "output_cost_per_token": 0.0000054, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_regions": [ - "us-west2" - ], + "supported_regions": ["us-west2"], "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -21950,14 +20546,8 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -21970,14 +20560,8 @@ "mode": "chat", "output_cost_per_token": 0.00000115, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -21990,14 +20574,8 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -22010,14 +20588,8 @@ "mode": "chat", "output_cost_per_token": 7e-7, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", - "supported_modalities": [ - "text", - "image" - ], - "supported_output_modalities": [ - "text", - "code" - ], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text", "code"], "supports_function_calling": true, "supports_tool_choice": true }, @@ -22220,12 +20792,8 @@ "mode": "video_generation", "output_cost_per_second": 0.35, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-fast-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -22234,12 +20802,8 @@ "mode": "video_generation", "output_cost_per_second": 0.4, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "vertex_ai/veo-3.0-generate-preview": { "litellm_provider": "vertex_ai-video-models", @@ -22248,12 +20812,8 @@ "mode": "video_generation", "output_cost_per_second": 0.75, "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": [ - "text" - ], - "supported_output_modalities": [ - "video" - ] + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] }, "voyage/rerank-2": { "input_cost_per_query": 5e-8, @@ -22848,9 +21408,7 @@ "litellm_provider": "openai", "mode": "audio_transcription", "output_cost_per_second": 0.0001, - "supported_endpoints": [ - "/v1/audio/transcriptions" - ] + "supported_endpoints": ["/v1/audio/transcriptions"] }, "xai/grok-2": { "input_cost_per_token": 0.000002, @@ -23230,4 +21788,4 @@ "supports_vision": true, "supports_web_search": true } -} \ No newline at end of file +}