From 9ce00ea816df8234e3950ca33c411c3b3a698dd6 Mon Sep 17 00:00:00 2001 From: Eden Reich Date: Mon, 27 Apr 2026 13:17:05 +0200 Subject: [PATCH] fix(config): Update model context windows and pricing for current model lineup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns context window patterns and pricing data with the actual model IDs served by the gateway, and fixes the misleading "(free)" label that appeared for any model lacking a pricing entry. - model_context.go: drop dead patterns ("claude-4", "claude-3.5", "claude-3", "claude-2") that never matched real IDs like "claude-opus-4-7"; add explicit "claude-opus-4-7" → 1M and the "claude-{opus,sonnet,haiku}-4" family at 200K - pricing_service.go: distinguish "no pricing entry" (returns "") from "explicit 0/0" (returns "free"); previously every unknown model was rendered as "(free)" - pricing.go: drop legacy claude-3.x/3-5 entries no longer on the API list; add Claude Opus 4.7/Sonnet 4.6/Opus 4.6 with paid prices from claude.com/pricing; add Gemini 2.5/2.0/3.x paid-tier prices from ai.google.dev/gemini-api/docs/pricing; add all 8 Gemma variants ($0); add 33 Ollama Cloud entries at $0 (subscription billing, not per-token); add Moonshot Kimi 2.5/2.6 - agents_test.go, config_test.go: refresh hardcoded model names ("claude-4", "claude-4-5-sonnet" → "claude-sonnet-4-6") --- config/agents_test.go | 2 +- config/config_test.go | 6 +- config/model_context.go | 4 +- config/pricing.go | 459 ++++++++++++++++++++-- internal/domain/pricing.go | 6 +- internal/models/context_test.go | 25 ++ internal/services/pricing_service.go | 51 ++- internal/services/pricing_service_test.go | 39 +- 8 files changed, 525 insertions(+), 67 deletions(-) diff --git a/config/agents_test.go b/config/agents_test.go index 3dcaca76..4c120856 100644 --- a/config/agents_test.go +++ b/config/agents_test.go @@ -191,7 +191,7 @@ func TestUpdateEntry_Agent(t *testing.T) { URL: "https://new-agent.example.com", OCI: "ghcr.io/org/test-agent:v2", Run: true, - Model: "anthropic/claude-4-5-sonnet", + Model: "anthropic/claude-sonnet-4-6", Environment: map[string]string{ "DEBUG": "true", }, diff --git a/config/config_test.go b/config/config_test.go index f0c9b89a..12540bc2 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -325,13 +325,13 @@ func TestSaveConfig(t *testing.T) { { name: "save chat config", setupFunc: func(cfg *Config) { - cfg.Agent.Model = "anthropic/claude-4" + cfg.Agent.Model = "anthropic/claude-sonnet-4-6" cfg.Prompts.Agent.SystemPrompt = "Be helpful" cfg.Gateway.APIKey = "secret-key" }, validator: func(t *testing.T, cfg *Config) { - if cfg.Agent.Model != "anthropic/claude-4" { - t.Errorf("Expected default model to be 'anthropic/claude-4', got %q", cfg.Agent.Model) + if cfg.Agent.Model != "anthropic/claude-sonnet-4-6" { + t.Errorf("Expected default model to be 'anthropic/claude-sonnet-4-6', got %q", cfg.Agent.Model) } if cfg.Prompts.Agent.SystemPrompt != "" { t.Errorf("Expected system prompt to be empty after round-trip (it lives in prompts.yaml), got %q", cfg.Prompts.Agent.SystemPrompt) diff --git a/config/model_context.go b/config/model_context.go index 3df628f5..1d0f1c15 100644 --- a/config/model_context.go +++ b/config/model_context.go @@ -19,8 +19,8 @@ var ContextMatchers = []ModelMatcher{ {Patterns: []string{"gpt-4-32k"}, ContextWindow: 32768}, {Patterns: []string{"gpt-4"}, ContextWindow: 8192}, {Patterns: []string{"gpt-3.5"}, ContextWindow: 16384}, - {Patterns: []string{"claude-4", "claude-3.5", "claude-3"}, ContextWindow: 200000}, - {Patterns: []string{"claude-2"}, ContextWindow: 100000}, + {Patterns: []string{"claude-opus-4-7"}, ContextWindow: 1000000}, + {Patterns: []string{"claude-opus-4", "claude-sonnet-4", "claude-haiku-4"}, ContextWindow: 200000}, {Patterns: []string{"claude"}, ContextWindow: 200000}, {Patterns: []string{"gemini-2", "gemini-1.5"}, ContextWindow: 1000000}, {Patterns: []string{"gemini"}, ContextWindow: 32768}, diff --git a/config/pricing.go b/config/pricing.go index 3be49e10..16b2a08b 100644 --- a/config/pricing.go +++ b/config/pricing.go @@ -36,6 +36,27 @@ func GetDefaultPricingConfig() PricingConfig { // Prices are based on publicly available pricing as of December 2024. // Users can override these in their config files. var DefaultModelPricing = map[string]ModelPricing{ + "anthropic/claude-opus-4-7": { + Provider: "anthropic", + Model: "claude-opus-4-7", + InputPricePerMToken: 5.00, + OutputPricePerMToken: 25.00, + Currency: "USD", + }, + "anthropic/claude-sonnet-4-6": { + Provider: "anthropic", + Model: "claude-sonnet-4-6", + InputPricePerMToken: 3.00, + OutputPricePerMToken: 15.00, + Currency: "USD", + }, + "anthropic/claude-opus-4-6": { + Provider: "anthropic", + Model: "claude-opus-4-6", + InputPricePerMToken: 5.00, + OutputPricePerMToken: 25.00, + Currency: "USD", + }, "anthropic/claude-opus-4-5-20251101": { Provider: "anthropic", Model: "claude-opus-4-5-20251101", @@ -78,34 +99,6 @@ var DefaultModelPricing = map[string]ModelPricing{ OutputPricePerMToken: 15.00, Currency: "USD", }, - "anthropic/claude-3-7-sonnet-20250219": { - Provider: "anthropic", - Model: "claude-3-7-sonnet-20250219", - InputPricePerMToken: 3.00, - OutputPricePerMToken: 15.00, - Currency: "USD", - }, - "anthropic/claude-3-5-haiku-20241022": { - Provider: "anthropic", - Model: "claude-3-5-haiku-20241022", - InputPricePerMToken: 0.80, - OutputPricePerMToken: 4.00, - Currency: "USD", - }, - "anthropic/claude-3-haiku-20240307": { - Provider: "anthropic", - Model: "claude-3-haiku-20240307", - InputPricePerMToken: 0.25, - OutputPricePerMToken: 1.25, - Currency: "USD", - }, - "anthropic/claude-3-opus-20240229": { - Provider: "anthropic", - Model: "claude-3-opus-20240229", - InputPricePerMToken: 15.00, - OutputPricePerMToken: 75.00, - Currency: "USD", - }, "openai/gpt-4o": { Provider: "openai", Model: "gpt-4o", @@ -162,9 +155,156 @@ var DefaultModelPricing = map[string]ModelPricing{ OutputPricePerMToken: 60.00, Currency: "USD", }, - "google/gemini-2.0-flash": { + "google/models/gemini-2.5-pro": { + Provider: "google", + Model: "models/gemini-2.5-pro", + InputPricePerMToken: 1.25, + OutputPricePerMToken: 10.00, + Currency: "USD", + }, + "google/models/gemini-2.5-flash": { + Provider: "google", + Model: "models/gemini-2.5-flash", + InputPricePerMToken: 0.30, + OutputPricePerMToken: 2.50, + Currency: "USD", + }, + "google/models/gemini-2.5-flash-lite": { + Provider: "google", + Model: "models/gemini-2.5-flash-lite", + InputPricePerMToken: 0.10, + OutputPricePerMToken: 0.40, + Currency: "USD", + }, + "google/models/gemini-2.0-flash": { + Provider: "google", + Model: "models/gemini-2.0-flash", + InputPricePerMToken: 0.10, + OutputPricePerMToken: 0.40, + Currency: "USD", + }, + "google/models/gemini-2.0-flash-001": { + Provider: "google", + Model: "models/gemini-2.0-flash-001", + InputPricePerMToken: 0.10, + OutputPricePerMToken: 0.40, + Currency: "USD", + }, + "google/models/gemini-2.0-flash-lite": { + Provider: "google", + Model: "models/gemini-2.0-flash-lite", + InputPricePerMToken: 0.075, + OutputPricePerMToken: 0.30, + Currency: "USD", + }, + "google/models/gemini-2.0-flash-lite-001": { + Provider: "google", + Model: "models/gemini-2.0-flash-lite-001", + InputPricePerMToken: 0.075, + OutputPricePerMToken: 0.30, + Currency: "USD", + }, + "google/models/gemini-2.5-flash-preview-tts": { + Provider: "google", + Model: "models/gemini-2.5-flash-preview-tts", + InputPricePerMToken: 0.50, + OutputPricePerMToken: 10.00, + Currency: "USD", + }, + "google/models/gemini-2.5-pro-preview-tts": { + Provider: "google", + Model: "models/gemini-2.5-pro-preview-tts", + InputPricePerMToken: 1.00, + OutputPricePerMToken: 20.00, + Currency: "USD", + }, + "google/models/gemini-3-flash-preview": { Provider: "google", - Model: "gemini-2.0-flash", + Model: "models/gemini-3-flash-preview", + InputPricePerMToken: 0.50, + OutputPricePerMToken: 3.00, + Currency: "USD", + }, + "google/models/gemini-3.1-pro-preview": { + Provider: "google", + Model: "models/gemini-3.1-pro-preview", + InputPricePerMToken: 2.00, + OutputPricePerMToken: 12.00, + Currency: "USD", + }, + "google/models/gemini-3.1-flash-lite-preview": { + Provider: "google", + Model: "models/gemini-3.1-flash-lite-preview", + InputPricePerMToken: 0.25, + OutputPricePerMToken: 1.50, + Currency: "USD", + }, + "google/models/gemini-embedding-001": { + Provider: "google", + Model: "models/gemini-embedding-001", + InputPricePerMToken: 0.20, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemini-embedding-2": { + Provider: "google", + Model: "models/gemini-embedding-2", + InputPricePerMToken: 0.20, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-3-1b-it": { + Provider: "google", + Model: "models/gemma-3-1b-it", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-3-4b-it": { + Provider: "google", + Model: "models/gemma-3-4b-it", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-3-12b-it": { + Provider: "google", + Model: "models/gemma-3-12b-it", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-3-27b-it": { + Provider: "google", + Model: "models/gemma-3-27b-it", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-3n-e2b-it": { + Provider: "google", + Model: "models/gemma-3n-e2b-it", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-3n-e4b-it": { + Provider: "google", + Model: "models/gemma-3n-e4b-it", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-4-26b-a4b-it": { + Provider: "google", + Model: "models/gemma-4-26b-a4b-it", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "google/models/gemma-4-31b-it": { + Provider: "google", + Model: "models/gemma-4-31b-it", InputPricePerMToken: 0.00, OutputPricePerMToken: 0.00, Currency: "USD", @@ -204,6 +344,265 @@ var DefaultModelPricing = map[string]ModelPricing{ OutputPricePerMToken: 0.28, Currency: "USD", }, + "ollama_cloud/kimi-k2.5": { + Provider: "ollama_cloud", + Model: "kimi-k2.5", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/kimi-k2.6": { + Provider: "ollama_cloud", + Model: "kimi-k2.6", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/qwen3-coder:480b": { + Provider: "ollama_cloud", + Model: "qwen3-coder:480b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/qwen3-coder-next": { + Provider: "ollama_cloud", + Model: "qwen3-coder-next", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/qwen3-vl:235b": { + Provider: "ollama_cloud", + Model: "qwen3-vl:235b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/qwen3-vl:235b-instruct": { + Provider: "ollama_cloud", + Model: "qwen3-vl:235b-instruct", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/qwen3-next:80b": { + Provider: "ollama_cloud", + Model: "qwen3-next:80b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/qwen3.5:397b": { + Provider: "ollama_cloud", + Model: "qwen3.5:397b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/devstral-2:123b": { + Provider: "ollama_cloud", + Model: "devstral-2:123b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/devstral-small-2:24b": { + Provider: "ollama_cloud", + Model: "devstral-small-2:24b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/gemma3:4b": { + Provider: "ollama_cloud", + Model: "gemma3:4b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/gemma3:12b": { + Provider: "ollama_cloud", + Model: "gemma3:12b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/gemma3:27b": { + Provider: "ollama_cloud", + Model: "gemma3:27b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/gemma4:31b": { + Provider: "ollama_cloud", + Model: "gemma4:31b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/ministral-3:3b": { + Provider: "ollama_cloud", + Model: "ministral-3:3b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/ministral-3:8b": { + Provider: "ollama_cloud", + Model: "ministral-3:8b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/ministral-3:14b": { + Provider: "ollama_cloud", + Model: "ministral-3:14b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/mistral-large-3:675b": { + Provider: "ollama_cloud", + Model: "mistral-large-3:675b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/glm-4.6": { + Provider: "ollama_cloud", + Model: "glm-4.6", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/glm-4.7": { + Provider: "ollama_cloud", + Model: "glm-4.7", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/glm-5": { + Provider: "ollama_cloud", + Model: "glm-5", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/glm-5.1": { + Provider: "ollama_cloud", + Model: "glm-5.1", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/gpt-oss:20b": { + Provider: "ollama_cloud", + Model: "gpt-oss:20b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/gpt-oss:120b": { + Provider: "ollama_cloud", + Model: "gpt-oss:120b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/minimax-m2": { + Provider: "ollama_cloud", + Model: "minimax-m2", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/minimax-m2.1": { + Provider: "ollama_cloud", + Model: "minimax-m2.1", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/minimax-m2.5": { + Provider: "ollama_cloud", + Model: "minimax-m2.5", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/minimax-m2.7": { + Provider: "ollama_cloud", + Model: "minimax-m2.7", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/nemotron-3-nano:30b": { + Provider: "ollama_cloud", + Model: "nemotron-3-nano:30b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/nemotron-3-super": { + Provider: "ollama_cloud", + Model: "nemotron-3-super", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/deepseek-v3.2": { + Provider: "ollama_cloud", + Model: "deepseek-v3.2", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/deepseek-v4-flash": { + Provider: "ollama_cloud", + Model: "deepseek-v4-flash", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/deepseek-v4-pro": { + Provider: "ollama_cloud", + Model: "deepseek-v4-pro", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/gemini-3-flash-preview": { + Provider: "ollama_cloud", + Model: "gemini-3-flash-preview", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "ollama_cloud/rnj-1:8b": { + Provider: "ollama_cloud", + Model: "rnj-1:8b", + InputPricePerMToken: 0.00, + OutputPricePerMToken: 0.00, + Currency: "USD", + }, + "moonshot/kimi-k2.5": { + Provider: "moonshot", + Model: "kimi-k2.5", + InputPricePerMToken: 0.60, + OutputPricePerMToken: 3.00, + Currency: "USD", + }, + "moonshot/kimi-k2.6": { + Provider: "moonshot", + Model: "kimi-k2.6", + InputPricePerMToken: 0.74, + OutputPricePerMToken: 4.66, + Currency: "USD", + }, "groq/llama-3.3-70b-versatile": { Provider: "groq", Model: "llama-3.3-70b-versatile", diff --git a/internal/domain/pricing.go b/internal/domain/pricing.go index e116d1fe..4ce44176 100644 --- a/internal/domain/pricing.go +++ b/internal/domain/pricing.go @@ -18,7 +18,7 @@ type SessionCostStats struct { TotalCost float64 TotalInputCost float64 TotalOutputCost float64 - PerModelStats map[string]*ModelCostStats // keyed by model name + PerModelStats map[string]*ModelCostStats Currency string } @@ -41,8 +41,8 @@ type PricingService interface { CalculateCost(model string, inputTokens, outputTokens int) (inputCost, outputCost, totalCost float64) // FormatModelPricing returns a formatted string describing the model's pricing. - // Returns empty string if pricing is disabled. - // Returns "free" if both input and output prices are 0.0. + // Returns empty string if pricing is disabled or the model has no pricing entry. + // Returns "free" only when an explicit pricing entry sets both prices to 0.0. // Returns "$X.XX/$Y.YY per MTok" for paid models. FormatModelPricing(model string) string } diff --git a/internal/models/context_test.go b/internal/models/context_test.go index 43f540c1..1c7487e3 100644 --- a/internal/models/context_test.go +++ b/internal/models/context_test.go @@ -44,6 +44,31 @@ func TestProviderPrefixStripping(t *testing.T) { } } +func TestClaudeContextWindow(t *testing.T) { + testModels := []struct { + model string + expected int + }{ + {"anthropic/claude-opus-4-7", 1000000}, + {"anthropic/claude-sonnet-4-6", 200000}, + {"anthropic/claude-opus-4-6", 200000}, + {"anthropic/claude-opus-4-5-20251101", 200000}, + {"anthropic/claude-haiku-4-5-20251001", 200000}, + {"anthropic/claude-sonnet-4-5-20250929", 200000}, + {"anthropic/claude-opus-4-1-20250805", 200000}, + {"anthropic/claude-opus-4-20250514", 200000}, + {"anthropic/claude-sonnet-4-20250514", 200000}, + } + + for _, tc := range testModels { + result := EstimateContextWindow(tc.model) + t.Logf("Model: %-45s -> Context Window: %d (expected: %d)", tc.model, result, tc.expected) + if result != tc.expected { + t.Errorf("Model %s: got %d, expected %d", tc.model, result, tc.expected) + } + } +} + func TestMoonshotContextWindow(t *testing.T) { testModels := []struct { model string diff --git a/internal/services/pricing_service.go b/internal/services/pricing_service.go index a8c3e0d6..a8b74b0d 100644 --- a/internal/services/pricing_service.go +++ b/internal/services/pricing_service.go @@ -3,8 +3,8 @@ package services import ( "fmt" - "github.com/inference-gateway/cli/config" - "github.com/inference-gateway/cli/internal/domain" + config "github.com/inference-gateway/cli/config" + domain "github.com/inference-gateway/cli/internal/domain" ) // PricingServiceImpl implements the PricingService interface. @@ -26,22 +26,26 @@ func (p *PricingServiceImpl) IsEnabled() bool { return p.config.Enabled } +// resolvePricing returns the input/output price for a model and whether it's known. +// Custom prices take precedence over defaults. +func (p *PricingServiceImpl) resolvePricing(model string) (input, output float64, ok bool) { + if customPrice, exists := p.config.CustomPrices[model]; exists { + return customPrice.InputPricePerMToken, customPrice.OutputPricePerMToken, true + } + if defaultPrice, exists := p.defaultPrices[model]; exists { + return defaultPrice.InputPricePerMToken, defaultPrice.OutputPricePerMToken, true + } + return 0.0, 0.0, false +} + // GetInputPrice retrieves the input price per million tokens for a specific model. // Returns 0.0 for unknown models (e.g., Ollama, custom models). func (p *PricingServiceImpl) GetInputPrice(model string) float64 { if !p.config.Enabled { return 0.0 } - - if customPrice, exists := p.config.CustomPrices[model]; exists { - return customPrice.InputPricePerMToken - } - - if defaultPrice, exists := p.defaultPrices[model]; exists { - return defaultPrice.InputPricePerMToken - } - - return 0.0 + input, _, _ := p.resolvePricing(model) + return input } // GetOutputPrice retrieves the output price per million tokens for a specific model. @@ -50,16 +54,8 @@ func (p *PricingServiceImpl) GetOutputPrice(model string) float64 { if !p.config.Enabled { return 0.0 } - - if customPrice, exists := p.config.CustomPrices[model]; exists { - return customPrice.OutputPricePerMToken - } - - if defaultPrice, exists := p.defaultPrices[model]; exists { - return defaultPrice.OutputPricePerMToken - } - - return 0.0 + _, output, _ := p.resolvePricing(model) + return output } // CalculateCost computes the total cost for a given number of input and output tokens. @@ -80,16 +76,19 @@ func (p *PricingServiceImpl) CalculateCost(model string, inputTokens, outputToke } // FormatModelPricing returns a formatted string describing the model's pricing. -// Returns empty string if pricing is disabled. -// Returns "free" if both input and output prices are 0.0. +// Returns empty string if pricing is disabled or the model has no pricing entry +// (callers should not assume "no entry" means "free"). +// Returns "free" only when an explicit pricing entry sets both prices to 0.0. // Returns "$X.XX/$Y.YY per MTok" for paid models. func (p *PricingServiceImpl) FormatModelPricing(model string) string { if !p.config.Enabled { return "" } - inputPrice := p.GetInputPrice(model) - outputPrice := p.GetOutputPrice(model) + inputPrice, outputPrice, ok := p.resolvePricing(model) + if !ok { + return "" + } if inputPrice == 0.0 && outputPrice == 0.0 { return "free" diff --git a/internal/services/pricing_service_test.go b/internal/services/pricing_service_test.go index 20b7696a..8a06cd35 100644 --- a/internal/services/pricing_service_test.go +++ b/internal/services/pricing_service_test.go @@ -58,11 +58,11 @@ func TestPricingService_FormatModelPricing(t *testing.T) { expectedOutput: "$0.14/$0.28 per MTok", }, { - name: "unknown model returns free", + name: "unknown model returns empty string", enabled: true, model: "unknown-model", customPrices: map[string]config.CustomPricing{}, - expectedOutput: "free", + expectedOutput: "", }, } @@ -81,6 +81,41 @@ func TestPricingService_FormatModelPricing(t *testing.T) { } } +func TestPricingService_GoogleModelDefaults(t *testing.T) { + tests := []struct { + model string + expectedOutput string + }{ + {"google/models/gemini-2.5-pro", "$1.25/$10.00 per MTok"}, + {"google/models/gemini-2.5-flash", "$0.30/$2.50 per MTok"}, + {"google/models/gemini-2.5-flash-lite", "$0.10/$0.40 per MTok"}, + {"google/models/gemini-2.0-flash", "$0.10/$0.40 per MTok"}, + {"google/models/gemini-2.0-flash-001", "$0.10/$0.40 per MTok"}, + {"google/models/gemini-2.0-flash-lite", "$0.07/$0.30 per MTok"}, + {"google/models/gemini-3-flash-preview", "$0.50/$3.00 per MTok"}, + {"google/models/gemini-3.1-pro-preview", "$2.00/$12.00 per MTok"}, + {"google/models/gemma-3-1b-it", "free"}, + {"google/models/gemma-3-27b-it", "free"}, + {"google/models/gemma-3n-e2b-it", "free"}, + {"google/models/gemma-4-31b-it", "free"}, + {"google/models/imagen-4.0-generate-001", ""}, + {"google/models/veo-3.0-generate-001", ""}, + } + + cfg := &config.PricingConfig{ + Enabled: true, + CustomPrices: map[string]config.CustomPricing{}, + } + service := NewPricingService(cfg) + + for _, tt := range tests { + t.Run(tt.model, func(t *testing.T) { + result := service.FormatModelPricing(tt.model) + assert.Equal(t, tt.expectedOutput, result) + }) + } +} + func TestPricingService_IsEnabled(t *testing.T) { tests := []struct { name string