diff --git a/scripts/update_models.ts b/scripts/update_models.ts index c6e840dd07..d1de810fd0 100644 --- a/scripts/update_models.ts +++ b/scripts/update_models.ts @@ -2,12 +2,12 @@ /** * Downloads the latest model prices and context window data from LiteLLM - * and saves it to src/utils/models.json + * and saves it to src/utils/tokens/models.json */ const LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"; -const OUTPUT_PATH = "src/utils/models.json"; +const OUTPUT_PATH = "src/utils/tokens/models.json"; async function updateModels() { console.log(`Fetching model data from ${LITELLM_URL}...`); diff --git a/src/components/ChatInput.tsx b/src/components/ChatInput.tsx index b3a0005154..565dd429ee 100644 --- a/src/components/ChatInput.tsx +++ b/src/components/ChatInput.tsx @@ -350,7 +350,7 @@ export const ChatInput: React.FC = ({ const inputRef = useRef(null); const modelSelectorRef = useRef(null); const [mode, setMode] = useMode(); - const { recentModels } = useModelLRU(); + const { recentModels, addModel } = useModelLRU(); const commandListId = useId(); // Get current send message options from shared hook (must be at component top level) @@ -359,8 +359,11 @@ export const ChatInput: React.FC = ({ const preferredModel = sendMessageOptions.model; // Setter for model - updates localStorage directly so useSendMessageOptions picks it up const setPreferredModel = useCallback( - (model: string) => updatePersistedState(getModelKey(workspaceId), model), - [workspaceId] + (model: string) => { + addModel(model); // Update LRU + updatePersistedState(getModelKey(workspaceId), model); // Update workspace-specific + }, + [workspaceId, addModel] ); const focusMessageInput = useCallback(() => { diff --git a/src/debug/agentSessionCli.ts b/src/debug/agentSessionCli.ts index e8a0f694fb..4efe6f76f8 100644 --- a/src/debug/agentSessionCli.ts +++ b/src/debug/agentSessionCli.ts @@ -22,7 +22,7 @@ import { type SendMessageOptions, type WorkspaceChatMessage, } from "@/types/ipc"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; import { ensureProvidersConfig } from "@/utils/providers/ensureProvidersConfig"; import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils"; import { extractAssistantText, extractReasoning, extractToolCalls } from "@/debug/chatExtractors"; @@ -184,7 +184,8 @@ async function main(): Promise { throw new Error("Message must be provided via --message or stdin"); } - const model = values.model && values.model.trim().length > 0 ? values.model.trim() : defaultModel; + const model = + values.model && values.model.trim().length > 0 ? values.model.trim() : getDefaultModelFromLRU(); const timeoutMs = parseTimeout(values.timeout); const thinkingLevel = parseThinkingLevel(values["thinking-level"]); const initialMode = parseMode(values.mode); diff --git a/src/debug/costs.ts b/src/debug/costs.ts index 62fcbca38f..967c0852de 100644 --- a/src/debug/costs.ts +++ b/src/debug/costs.ts @@ -3,7 +3,7 @@ import * as path from "path"; import { defaultConfig } from "@/config"; import type { CmuxMessage } from "@/types/message"; import { calculateTokenStats } from "@/utils/tokens/tokenStatsCalculator"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; /** * Debug command to display cost/token statistics for a workspace @@ -35,7 +35,7 @@ export function costsCommand(workspaceId: string) { // Detect model from first assistant message const firstAssistantMessage = messages.find((msg) => msg.role === "assistant"); - const model = firstAssistantMessage?.metadata?.model ?? defaultModel; + const model = firstAssistantMessage?.metadata?.model ?? getDefaultModelFromLRU(); // Calculate stats using shared logic (now synchronous) const stats = calculateTokenStats(messages, model); diff --git a/src/debug/send-message.ts b/src/debug/send-message.ts index 420ab8f64f..270b61603f 100644 --- a/src/debug/send-message.ts +++ b/src/debug/send-message.ts @@ -3,7 +3,7 @@ import * as path from "path"; import { defaultConfig } from "@/config"; import type { CmuxMessage } from "@/types/message"; import type { SendMessageOptions } from "@/types/ipc"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; /** * Debug command to send a message to a workspace, optionally editing an existing message @@ -103,7 +103,7 @@ export function sendMessageCommand( // Prepare options const options: SendMessageOptions = { - model: defaultModel, + model: getDefaultModelFromLRU(), }; if (editMessageId) { diff --git a/src/hooks/useAIViewKeybinds.ts b/src/hooks/useAIViewKeybinds.ts index ba1c16e2e8..5dce224d1b 100644 --- a/src/hooks/useAIViewKeybinds.ts +++ b/src/hooks/useAIViewKeybinds.ts @@ -6,7 +6,7 @@ import { updatePersistedState, readPersistedState } from "@/hooks/usePersistedSt import type { ThinkingLevel, ThinkingLevelOn } from "@/types/thinking"; import { DEFAULT_THINKING_LEVEL } from "@/types/thinking"; import { getThinkingPolicyForModel } from "@/utils/thinking/policy"; -import { defaultModel } from "@/utils/ai/models"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; interface UseAIViewKeybindsParams { workspaceId: string; @@ -66,10 +66,10 @@ export function useAIViewKeybinds({ e.preventDefault(); // Get selected model from localStorage (what user sees in UI) - // Fall back to message history model, then to default model + // Fall back to message history model, then to most recent model from LRU // This matches the same logic as useSendMessageOptions const selectedModel = readPersistedState(getModelKey(workspaceId), null); - const modelToUse = selectedModel ?? currentModel ?? defaultModel; + const modelToUse = selectedModel ?? currentModel ?? getDefaultModelFromLRU(); // Storage key for remembering this model's last-used active thinking level const lastThinkingKey = getLastThinkingByModelKey(modelToUse); diff --git a/src/hooks/useModelLRU.ts b/src/hooks/useModelLRU.ts index 42eeea623c..9ec1d47fd4 100644 --- a/src/hooks/useModelLRU.ts +++ b/src/hooks/useModelLRU.ts @@ -1,6 +1,7 @@ import { useCallback, useEffect } from "react"; -import { usePersistedState } from "./usePersistedState"; +import { usePersistedState, readPersistedState } from "./usePersistedState"; import { MODEL_ABBREVIATIONS } from "@/utils/slashCommands/registry"; +import { defaultModel } from "@/utils/ai/models"; const MAX_LRU_SIZE = 8; const LRU_KEY = "model-lru"; @@ -8,31 +9,37 @@ const LRU_KEY = "model-lru"; // Default models from abbreviations (for initial LRU population) const DEFAULT_MODELS = Object.values(MODEL_ABBREVIATIONS); +/** + * Get the default model from LRU (non-hook version for use outside React) + * This is the ONLY place that reads from LRU outside of the hook. + * + * @returns The most recently used model, or defaultModel if LRU is empty + */ +export function getDefaultModelFromLRU(): string { + const lru = readPersistedState(LRU_KEY, DEFAULT_MODELS.slice(0, MAX_LRU_SIZE)); + return lru[0] ?? defaultModel; +} + /** * Hook to manage a Least Recently Used (LRU) cache of AI models. * Stores up to 8 recently used models in localStorage. * Initializes with default abbreviated models if empty. */ export function useModelLRU() { - const [recentModels, setRecentModels] = usePersistedState(LRU_KEY, []); + const [recentModels, setRecentModels] = usePersistedState( + LRU_KEY, + DEFAULT_MODELS.slice(0, MAX_LRU_SIZE) + ); - // Ensure default models are always present in the LRU (only once on mount) + // Merge any new defaults from MODEL_ABBREVIATIONS (only once on mount) useEffect(() => { setRecentModels((prev) => { - // If empty, just use defaults - if (prev.length === 0) { - return DEFAULT_MODELS.slice(0, MAX_LRU_SIZE); - } - - // If we have some models, merge with defaults (keeping existing order, adding missing defaults at end) const merged = [...prev]; for (const defaultModel of DEFAULT_MODELS) { if (!merged.includes(defaultModel)) { merged.push(defaultModel); } } - - // Limit to MAX_LRU_SIZE return merged.slice(0, MAX_LRU_SIZE); }); // eslint-disable-next-line react-hooks/exhaustive-deps diff --git a/src/hooks/useSendMessageOptions.ts b/src/hooks/useSendMessageOptions.ts index 2ae7feea48..fa54a075db 100644 --- a/src/hooks/useSendMessageOptions.ts +++ b/src/hooks/useSendMessageOptions.ts @@ -2,8 +2,8 @@ import { use1MContext } from "./use1MContext"; import { useThinkingLevel } from "./useThinkingLevel"; import { useMode } from "@/contexts/ModeContext"; import { usePersistedState } from "./usePersistedState"; +import { useModelLRU } from "./useModelLRU"; import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils"; -import { defaultModel } from "@/utils/ai/models"; import { getModelKey } from "@/constants/storage"; import type { SendMessageOptions } from "@/types/ipc"; import type { UIMode } from "@/types/mode"; @@ -19,13 +19,14 @@ function constructSendMessageOptions( mode: UIMode, thinkingLevel: ThinkingLevel, preferredModel: string | null | undefined, - use1M: boolean + use1M: boolean, + fallbackModel: string ): SendMessageOptions { const additionalSystemInstructions = mode === "plan" ? PLAN_MODE_INSTRUCTION : undefined; // Ensure model is always a valid string (defensive against corrupted localStorage) const model = - typeof preferredModel === "string" && preferredModel ? preferredModel : defaultModel; + typeof preferredModel === "string" && preferredModel ? preferredModel : fallbackModel; // Enforce thinking policy at the UI boundary as well (e.g., gpt-5-pro → high only) const uiThinking = enforceThinkingPolicy(model, thinkingLevel); @@ -58,13 +59,14 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptions { const [use1M] = use1MContext(); const [thinkingLevel] = useThinkingLevel(); const [mode] = useMode(); + const { recentModels } = useModelLRU(); const [preferredModel] = usePersistedState( getModelKey(workspaceId), - defaultModel, + recentModels[0], // Most recently used model (LRU is never empty) { listener: true } // Listen for changes from ModelSelector and other sources ); - return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M); + return constructSendMessageOptions(mode, thinkingLevel, preferredModel, use1M, recentModels[0]); } /** diff --git a/src/utils/messages/sendOptions.ts b/src/utils/messages/sendOptions.ts index 8454070196..6fd6623b44 100644 --- a/src/utils/messages/sendOptions.ts +++ b/src/utils/messages/sendOptions.ts @@ -4,13 +4,13 @@ import { getModeKey, USE_1M_CONTEXT_KEY, } from "@/constants/storage"; -import { defaultModel } from "@/utils/ai/models"; import { modeToToolPolicy, PLAN_MODE_INSTRUCTION } from "@/utils/ui/modeUtils"; import { readPersistedState } from "@/hooks/usePersistedState"; import type { SendMessageOptions } from "@/types/ipc"; import type { UIMode } from "@/types/mode"; import type { ThinkingLevel } from "@/types/thinking"; import { enforceThinkingPolicy } from "@/utils/thinking/policy"; +import { getDefaultModelFromLRU } from "@/hooks/useModelLRU"; /** * Get send options from localStorage @@ -20,8 +20,8 @@ import { enforceThinkingPolicy } from "@/utils/thinking/policy"; * This ensures DRY - single source of truth for option extraction. */ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptions { - // Read model preference (workspace-specific) - const model = readPersistedState(getModelKey(workspaceId), defaultModel); + // Read model preference (workspace-specific), fallback to most recent from LRU + const model = readPersistedState(getModelKey(workspaceId), getDefaultModelFromLRU()); // Read thinking level (workspace-specific) const thinkingLevel = readPersistedState( diff --git a/src/utils/slashCommands/registry.ts b/src/utils/slashCommands/registry.ts index a2a95bfae2..063f46f860 100644 --- a/src/utils/slashCommands/registry.ts +++ b/src/utils/slashCommands/registry.ts @@ -11,9 +11,11 @@ import type { import minimist from "minimist"; // Model abbreviations for common models +// Order matters: first model becomes the default for new chats export const MODEL_ABBREVIATIONS: Record = { - opus: "anthropic:claude-opus-4-1", sonnet: "anthropic:claude-sonnet-4-5", + haiku: "anthropic:claude-haiku-4-5", + opus: "anthropic:claude-opus-4-1", "gpt-5": "openai:gpt-5", "gpt-5-pro": "openai:gpt-5-pro", codex: "openai:gpt-5-codex", diff --git a/src/utils/tokens/models-extra.ts b/src/utils/tokens/models-extra.ts index bf191768ff..cfa6431813 100644 --- a/src/utils/tokens/models-extra.ts +++ b/src/utils/tokens/models-extra.ts @@ -39,4 +39,20 @@ export const modelsExtra: Record = { knowledge_cutoff: "2024-09-30", supported_endpoints: ["/v1/responses"], }, + + // Claude Haiku 4.5 - Released October 15, 2025 + // $1/M input, $5/M output + "claude-haiku-4-5": { + max_input_tokens: 200000, + max_output_tokens: 8192, + input_cost_per_token: 0.000001, // $1 per million input tokens + output_cost_per_token: 0.000005, // $5 per million output tokens + cache_creation_input_token_cost: 0.00000125, // $1.25 per million tokens + cache_read_input_token_cost: 0.0000001, // $0.10 per million tokens + litellm_provider: "anthropic", + mode: "chat", + supports_function_calling: true, + supports_vision: true, + supports_response_schema: true, + }, }; diff --git a/src/utils/tokens/models.json b/src/utils/tokens/models.json index da8e10faac..ae6f03b523 100644 --- a/src/utils/tokens/models.json +++ b/src/utils/tokens/models.json @@ -846,6 +846,36 @@ "mode": "audio_transcription", "output_cost_per_second": 0 }, + "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "azure/ada": { "input_cost_per_token": 1e-7, "litellm_provider": "azure", @@ -1850,9 +1880,9 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "azure", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], @@ -2874,6 +2904,42 @@ "supports_function_calling": true, "supports_vision": true }, + "azure_ai/Phi-4-mini-reasoning": { + "input_cost_per_token": 8e-8, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.2e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true + }, + "azure_ai/Phi-4-reasoning": { + "input_cost_per_token": 1.25e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-7, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "azure_ai/MAI-DS-R1": { + "input_cost_per_token": 0.00000135, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.0000054, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_reasoning": true, + "supports_tool_choice": true + }, "azure_ai/cohere-rerank-v3-english": { "input_cost_per_query": 0.002, "input_cost_per_token": 0, @@ -3013,6 +3079,63 @@ "supports_tool_choice": true, "supports_web_search": true }, + "azure_ai/grok-4": { + "input_cost_per_token": 0.0000055, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-non-reasoning": { + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-reasoning": { + "input_cost_per_token": 4.3e-7, + "output_cost_per_token": 0.00000173, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/announcing-the-grok-4-fast-models-from-xai-now-available-in-azure-ai-foundry/4456701", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-code-fast-1": { + "input_cost_per_token": 0.0000035, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0000175, + "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, "azure_ai/jais-30b-chat": { "input_cost_per_token": 0.0032, "litellm_provider": "azure_ai", @@ -4423,7 +4546,7 @@ "input_cost_per_token_above_200k_tokens": 0.000006, "litellm_provider": "anthropic", "max_input_tokens": 1000000, - "max_output_tokens": 1000000, + "max_output_tokens": 64000, "max_tokens": 1000000, "mode": "chat", "output_cost_per_token": 0.000015, @@ -4448,6 +4571,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -4474,6 +4601,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -4927,6 +5058,16 @@ "output_cost_per_token": 0, "supports_embedding_image_input": true }, + "cohere.embed-v4:0": { + "input_cost_per_token": 1.2e-7, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, "cohere.rerank-v3-5:0": { "input_cost_per_query": 0.002, "input_cost_per_token": 0, @@ -6155,629 +6296,679 @@ "supported_endpoints": ["/v1/audio/transcriptions"] }, "deepinfra/Gryphe/MythoMax-L2-13b": { - "input_cost_per_token": 7.2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "max_tokens": 4096, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 9e-8, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 7.2e-8, "supports_tool_choice": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { - "input_cost_per_token": 7e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 8e-7, "supports_tool_choice": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.8e-7, "supports_tool_choice": false }, "deepinfra/Qwen/QwQ-32B": { - "input_cost_per_token": 1.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 1.5e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen2.5-72B-Instruct": { - "input_cost_per_token": 1.2e-7, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 1.2e-7, "output_cost_per_token": 3.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen2.5-7B-Instruct": { - "input_cost_per_token": 4e-8, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 4e-8, "output_cost_per_token": 1e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": { - "input_cost_per_token": 2e-7, - "litellm_provider": "deepinfra", + "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "max_tokens": 128000, - "mode": "chat", + "input_cost_per_token": 2e-7, "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-14B": { - "input_cost_per_token": 6e-8, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 6e-8, "output_cost_per_token": 2.4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B": { - "input_cost_per_token": 1.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.4e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 6e-7, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { - "input_cost_per_token": 1.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", + "input_cost_per_token": 9e-8, "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { - "input_cost_per_token": 1.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000029, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 6e-7, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-30B-A3B": { - "input_cost_per_token": 8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 8e-8, "output_cost_per_token": 2.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-32B": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2.8e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 3e-7, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { - "input_cost_per_token": 4e-7, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", + "input_cost_per_token": 4e-7, "output_cost_per_token": 0.0000016, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { - "cache_read_input_token_cost": 2.4e-7, - "input_cost_per_token": 3e-7, + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 0.0000012, "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 0.0000014, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.0000012, "supports_tool_choice": true }, "deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { - "input_cost_per_token": 2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", + "input_cost_per_token": 4e-8, "output_cost_per_token": 5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": { - "input_cost_per_token": 6.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 6.5e-7, "output_cost_per_token": 7.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": { - "input_cost_per_token": 6.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 6.5e-7, "output_cost_per_token": 7.5e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/allenai/olmOCR-7B-0725-FP8": { - "input_cost_per_token": 2.7e-7, - "litellm_provider": "deepinfra", + "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "max_tokens": 16384, - "mode": "chat", + "input_cost_per_token": 2.7e-7, "output_cost_per_token": 0.0000015, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/anthropic/claude-3-7-sonnet-latest": { - "cache_read_input_token_cost": 3.3e-7, - "input_cost_per_token": 0.0000033, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 0.0000033, "output_cost_per_token": 0.0000165, + "cache_read_input_token_cost": 3.3e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/anthropic/claude-4-opus": { - "input_cost_per_token": 0.0000165, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 0.0000165, "output_cost_per_token": 0.0000825, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/anthropic/claude-4-sonnet": { - "input_cost_per_token": 0.0000033, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 0.0000033, "output_cost_per_token": 0.0000165, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1": { - "input_cost_per_token": 7e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 7e-7, "output_cost_per_token": 0.0000024, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528": { - "cache_read_input_token_cost": 4e-7, - "input_cost_per_token": 5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 5e-7, "output_cost_per_token": 0.00000215, + "cache_read_input_token_cost": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { - "input_cost_per_token": 0.000001, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 4e-7, "supports_tool_choice": false }, "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { - "input_cost_per_token": 7.5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 2.7e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.5e-7, "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { - "input_cost_per_token": 0.000001, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3": { - "input_cost_per_token": 3.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 3.8e-7, "output_cost_per_token": 8.9e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3-0324": { - "cache_read_input_token_cost": 2.24e-7, - "input_cost_per_token": 2.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 2.5e-7, "output_cost_per_token": 8.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3.1": { - "cache_read_input_token_cost": 2.16e-7, + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 0.000001, + "cache_read_input_token_cost": 2.16e-7, "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_reasoning": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 2.7e-7, "output_cost_per_token": 0.000001, - "supports_reasoning": true, + "cache_read_input_token_cost": 2.16e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemini-2.0-flash-001": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, - "mode": "chat", + "input_cost_per_token": 1e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemini-2.5-flash": { - "input_cost_per_token": 2.1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.00000175, "supports_tool_choice": true }, "deepinfra/google/gemini-2.5-pro": { - "input_cost_per_token": 8.75e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.000007, "supports_tool_choice": true }, "deepinfra/google/gemma-3-12b-it": { - "input_cost_per_token": 5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5e-8, "output_cost_per_token": 1e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemma-3-27b-it": { - "input_cost_per_token": 9e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 9e-8, + "output_cost_per_token": 1.6e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.7e-7, "supports_tool_choice": true }, "deepinfra/google/gemma-3-4b-it": { - "input_cost_per_token": 4e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4e-8, "output_cost_per_token": 8e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { - "input_cost_per_token": 4.9e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4.9e-8, "output_cost_per_token": 4.9e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Llama-3.2-3B-Instruct": { - "input_cost_per_token": 1.2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 2e-8, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.4e-8, "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct": { - "input_cost_per_token": 2.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 2.3e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { - "input_cost_per_token": 3.8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 3.9e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.2e-7, "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { - "input_cost_per_token": 1.5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 1048576, "max_input_tokens": 1048576, "max_output_tokens": 1048576, - "max_tokens": 1048576, - "mode": "chat", + "input_cost_per_token": 1.5e-7, "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { - "input_cost_per_token": 8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 327680, "max_input_tokens": 327680, "max_output_tokens": 327680, - "max_tokens": 327680, - "mode": "chat", + "input_cost_per_token": 8e-8, "output_cost_per_token": 3e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-Guard-3-8B": { - "input_cost_per_token": 5.5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5.5e-8, "output_cost_per_token": 5.5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Llama-Guard-4-12B": { - "input_cost_per_token": 1.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 1.8e-7, "output_cost_per_token": 1.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { - "input_cost_per_token": 3e-8, - "litellm_provider": "deepinfra", + "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", + "input_cost_per_token": 3e-8, "output_cost_per_token": 6e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { - "input_cost_per_token": 2.3e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4e-7, "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { - "input_cost_per_token": 1e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 1e-7, "output_cost_per_token": 2.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { - "input_cost_per_token": 3e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 3e-8, "output_cost_per_token": 5e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { - "input_cost_per_token": 1.5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2e-8, "supports_tool_choice": true }, "deepinfra/microsoft/WizardLM-2-8x22B": { - "input_cost_per_token": 4.8e-7, - "litellm_provider": "deepinfra", + "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, - "max_tokens": 65536, - "mode": "chat", + "input_cost_per_token": 4.8e-7, "output_cost_per_token": 4.8e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/microsoft/phi-4": { - "input_cost_per_token": 7e-8, - "litellm_provider": "deepinfra", + "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "max_tokens": 16384, - "mode": "chat", + "input_cost_per_token": 7e-8, "output_cost_per_token": 1.4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { - "input_cost_per_token": 2e-8, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 2e-8, "output_cost_per_token": 4e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { - "input_cost_per_token": 5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 5e-8, "output_cost_per_token": 8e-8, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { - "input_cost_per_token": 5e-8, - "litellm_provider": "deepinfra", + "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "max_tokens": 128000, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 2e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1e-7, "supports_tool_choice": true }, "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { - "input_cost_per_token": 8e-8, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.4e-7, "supports_tool_choice": true }, "deepinfra/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 5e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000002, + "litellm_provider": "deepinfra", "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-7, "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 4e-7, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { - "input_cost_per_token": 1.2e-7, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 3e-7, "supports_tool_choice": true }, - "deepinfra/openai/gpt-oss-120b": { - "input_cost_per_token": 9e-8, - "litellm_provider": "deepinfra", + "deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 4.5e-7, "supports_tool_choice": true }, - "deepinfra/openai/gpt-oss-20b": { + "deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.6e-7, "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/openai/gpt-oss-120b": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4.5e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.6e-7, "supports_tool_choice": true }, - "deepinfra/zai-org/GLM-4.5": { - "input_cost_per_token": 5.5e-7, - "litellm_provider": "deepinfra", + "deepinfra/openai/gpt-oss-20b": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.000002, "supports_tool_choice": true }, - "deepinfra/zai-org/GLM-4.5-Air": { - "input_cost_per_token": 2e-7, - "litellm_provider": "deepinfra", + "deepinfra/zai-org/GLM-4.5": { + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 0.0000011, "supports_tool_choice": true }, "deepseek/deepseek-chat": { @@ -7246,6 +7437,36 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "eu.meta.llama3-2-1b-instruct-v1:0": { "input_cost_per_token": 1.3e-7, "litellm_provider": "bedrock", @@ -10688,39 +10909,99 @@ "supports_audio_output": false, "supports_function_calling": true, "supports_response_schema": true, - "supports_system_messages": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "gemini/veo-2.0-generate-001": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.35, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.0-fast-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.4, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "gemini/veo-3.0-generate-preview": { + "litellm_provider": "gemini", + "max_input_tokens": 1024, + "max_tokens": 1024, + "mode": "video_generation", + "output_cost_per_second": 0.75, + "source": "https://ai.google.dev/gemini-api/docs/video", + "supported_modalities": ["text"], + "supported_output_modalities": ["video"] + }, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true - }, - "gemini/veo-2.0-generate-001": { - "litellm_provider": "gemini", - "max_input_tokens": 1024, - "max_tokens": 1024, - "mode": "video_generation", - "output_cost_per_second": 0.35, - "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] - }, - "gemini/veo-3.0-fast-generate-preview": { - "litellm_provider": "gemini", - "max_input_tokens": 1024, - "max_tokens": 1024, - "mode": "video_generation", - "output_cost_per_second": 0.4, - "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] - }, - "gemini/veo-3.0-generate-preview": { - "litellm_provider": "gemini", - "max_input_tokens": 1024, - "max_tokens": 1024, - "mode": "video_generation", - "output_cost_per_second": 0.75, - "source": "https://ai.google.dev/gemini-api/docs/video", - "supported_modalities": ["text"], - "supported_output_modalities": ["video"] + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 }, "gpt-3.5-turbo": { "input_cost_per_token": 5e-7, @@ -11700,6 +11981,56 @@ "supports_tool_choice": true, "supports_vision": true }, + "gpt-5-pro": { + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-pro-2025-10-06": { + "input_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000075, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 0.00012, + "output_cost_per_token_batches": 0.00006, + "supported_endpoints": ["/v1/batch", "/v1/responses"], + "supported_modalities": ["text", "image"], + "supported_output_modalities": ["text"], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, "gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.25e-7, "cache_read_input_token_cost_flex": 6.25e-8, @@ -11757,9 +12088,9 @@ "cache_read_input_token_cost": 1.25e-7, "input_cost_per_token": 0.00000125, "litellm_provider": "openai", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 0.00001, "supported_endpoints": ["/v1/chat/completions", "/v1/batch", "/v1/responses"], @@ -11917,6 +12248,16 @@ "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, + "gpt-image-1-mini": { + "cache_read_input_image_token_cost": 2.5e-7, + "cache_read_input_token_cost": 2e-7, + "input_cost_per_image_token": 0.0000025, + "input_cost_per_token": 0.000002, + "litellm_provider": "openai", + "mode": "chat", + "output_cost_per_image_token": 0.000008, + "supported_endpoints": ["/v1/images/generations", "/v1/images/edits"] + }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-7, "cache_read_input_token_cost": 4e-7, @@ -11940,6 +12281,28 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "gpt-realtime-mini": { + "cache_creation_input_audio_token_cost": 3e-7, + "cache_read_input_audio_token_cost": 3e-7, + "input_cost_per_audio_token": 0.00001, + "input_cost_per_token": 6e-7, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.00002, + "output_cost_per_token": 0.0000024, + "supported_endpoints": ["/v1/realtime"], + "supported_modalities": ["text", "image", "audio"], + "supported_output_modalities": ["text", "audio"], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "gpt-realtime-2025-08-28": { "cache_creation_input_audio_token_cost": 4e-7, "cache_read_input_token_cost": 4e-7, @@ -12388,6 +12751,19 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "groq/moonshotai/kimi-k2-instruct-0905": { + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "cache_read_input_token_cost": 5e-7, + "litellm_provider": "groq", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 278528, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "groq/openai/gpt-oss-120b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "groq", @@ -12857,6 +13233,36 @@ "mode": "rerank", "output_cost_per_token": 1.8e-8 }, + "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "lambda_ai/deepseek-llama3.3-70b": { "input_cost_per_token": 2e-7, "litellm_provider": "lambda_ai", @@ -13220,6 +13626,42 @@ "output_cost_per_pixel": 0, "supported_endpoints": ["/v1/images/generations"] }, + "low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.005, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.011, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, + "medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": ["/v1/images/generations"] + }, "medlm-large": { "input_cost_per_character": 0.000005, "litellm_provider": "vertex_ai-language-models", @@ -14842,6 +15284,42 @@ "supports_function_calling": true, "supports_response_schema": false }, + "oci/cohere.command-latest": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-03-2025": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-plus-latest": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, "ollama/codegeex4": { "input_cost_per_token": 0, "litellm_provider": "ollama", @@ -15375,6 +15853,25 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "openrouter/anthropic/claude-sonnet-4.5": { + "input_cost_per_image": 0.0048, + "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 0.000015, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "openrouter/bytedance/ui-tars-1.5-7b": { "input_cost_per_token": 1e-7, "litellm_provider": "openrouter", @@ -16966,6 +17463,20 @@ "mode": "rerank", "output_cost_per_token": 0 }, + "nvidia_nim/nvidia/nv-rerankqa-mistral-4b-v3": { + "input_cost_per_query": 0, + "input_cost_per_token": 0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0 + }, + "nvidia_nim/nvidia/llama-3_2-nv-rerankqa-1b-v2": { + "input_cost_per_query": 0, + "input_cost_per_token": 0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0 + }, "sagemaker/meta-textgeneration-llama-2-13b": { "input_cost_per_token": 0, "litellm_provider": "sagemaker", @@ -17804,6 +18315,22 @@ "mode": "embedding", "output_cost_per_token": 0 }, + "together_ai/baai/bge-base-en-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768 + }, + "together_ai/BAAI/bge-base-en-v1.5": { + "input_cost_per_token": 8e-9, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0, + "output_vector_size": 768 + }, "together-ai-up-to-4b": { "input_cost_per_token": 1e-7, "litellm_provider": "together_ai", @@ -18053,13 +18580,46 @@ "supports_parallel_function_calling": true, "supports_tool_choice": true }, - "together_ai/zai-org/GLM-4.5-Air-FP8": { - "input_cost_per_token": 2e-7, + "together_ai/zai-org/GLM-4.5-Air-FP8": { + "input_cost_per_token": 2e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000011, + "source": "https://www.together.ai/models/glm-4-5-air", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/moonshotai/Kimi-K2-Instruct-0905": { + "input_cost_per_token": 0.000001, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.000003, + "source": "https://www.together.ai/models/kimi-k2-0905", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 0.0000015, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "input_cost_per_token": 1.5e-7, "litellm_provider": "together_ai", - "max_input_tokens": 128000, + "max_input_tokens": 262144, "mode": "chat", - "output_cost_per_token": 0.0000011, - "source": "https://www.together.ai/models/glm-4-5-air", + "output_cost_per_token": 0.0000015, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_tool_choice": true @@ -18268,15 +18828,19 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 0.00000375, - "cache_read_input_token_cost": 3e-7, - "input_cost_per_token": 0.000003, + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "input_cost_per_token_above_200k_tokens": 0.0000066, + "output_cost_per_token_above_200k_tokens": 0.00002475, + "cache_creation_input_token_cost_above_200k_tokens": 0.00000825, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-7, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 0.000015, + "output_cost_per_token": 0.0000165, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -19625,6 +20189,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, @@ -19647,6 +20215,10 @@ "cache_creation_input_token_cost": 0.00000375, "cache_read_input_token_cost": 3e-7, "input_cost_per_token": 0.000003, + "input_cost_per_token_above_200k_tokens": 0.000006, + "output_cost_per_token_above_200k_tokens": 0.0000225, + "cache_creation_input_token_cost_above_200k_tokens": 0.0000075, + "cache_read_input_token_cost_above_200k_tokens": 6e-7, "input_cost_per_token_batches": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, @@ -20531,6 +21103,306 @@ "supports_tool_choice": true, "supports_vision": false }, + "watsonx/bigscience/mt0-xxl-13b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/core42/jais-13b-chat": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/google/flan-t5-xl-3b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0001, + "output_cost_per_token": 0.00025, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-chat-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-instruct-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-3-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-4-h-small": { + "max_tokens": 20480, + "max_input_tokens": 20480, + "max_output_tokens": 20480, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.0025, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-3-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1024-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1536-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-512-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-vision-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-11b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-1b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0001, + "output_cost_per_token": 0.0002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-3b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-90b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.002, + "output_cost_per_token": 0.008, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.002, + "output_cost_per_token": 0.006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-4-maverick-17b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-guard-3-11b-vision": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/mistralai/mistral-medium-2505": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00225, + "output_cost_per_token": 0.00675, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0002, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.00015, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/openai/gpt-oss-120b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.004, + "output_cost_per_token": 0.016, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/sdaia/allam-1-13b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, "whisper-1": { "input_cost_per_second": 0.0001, "litellm_provider": "openai",