From 78f2ead38af09602dd692bd17f9750f1ad8cb263 Mon Sep 17 00:00:00 2001 From: Andre Landgraf Date: Fri, 3 Jul 2026 11:41:48 -0700 Subject: [PATCH 1/3] neon: sync catalog with live AI Gateway probe (add 12, remove gpt-5-5) Verified every Databricks Foundation Model API endpoint against a live Neon AI Gateway branch (us-east-2). Adds 12 models confirmed working (with live-checked image-input + tool-calling capabilities) and removes gpt-5-5, which the gateway rejects as an unknown model. --- providers/neon/models/claude-opus-4-8.toml | 12 ++++++++++ providers/neon/models/gemini-3-5-flash.toml | 8 +++++++ providers/neon/models/gemma-3-12b.toml | 23 +++++++++++++++++++ providers/neon/models/gpt-5-1-codex-max.toml | 7 ++++++ providers/neon/models/gpt-5-1-codex-mini.toml | 7 ++++++ providers/neon/models/gpt-5-2-codex.toml | 7 ++++++ providers/neon/models/gpt-5-3-codex.toml | 7 ++++++ providers/neon/models/gpt-5-5.toml | 17 -------------- providers/neon/models/llama-4-maverick.toml | 5 ++++ .../models/meta-llama-3-1-8b-instruct.toml | 23 +++++++++++++++++++ .../models/meta-llama-3-3-70b-instruct.toml | 6 +++++ .../models/qwen3-next-80b-a3b-instruct.toml | 5 ++++ providers/neon/models/qwen35-122b-a10b.toml | 14 +++++++++++ 13 files changed, 124 insertions(+), 17 deletions(-) create mode 100644 providers/neon/models/claude-opus-4-8.toml create mode 100644 providers/neon/models/gemini-3-5-flash.toml create mode 100644 providers/neon/models/gemma-3-12b.toml create mode 100644 providers/neon/models/gpt-5-1-codex-max.toml create mode 100644 providers/neon/models/gpt-5-1-codex-mini.toml create mode 100644 providers/neon/models/gpt-5-2-codex.toml create mode 100644 providers/neon/models/gpt-5-3-codex.toml delete mode 100644 providers/neon/models/gpt-5-5.toml create mode 100644 providers/neon/models/llama-4-maverick.toml create mode 100644 providers/neon/models/meta-llama-3-1-8b-instruct.toml create mode 100644 providers/neon/models/meta-llama-3-3-70b-instruct.toml create mode 100644 providers/neon/models/qwen3-next-80b-a3b-instruct.toml create mode 100644 providers/neon/models/qwen35-122b-a10b.toml diff --git a/providers/neon/models/claude-opus-4-8.toml b/providers/neon/models/claude-opus-4-8.toml new file mode 100644 index 0000000000..980c04951e --- /dev/null +++ b/providers/neon/models/claude-opus-4-8.toml @@ -0,0 +1,12 @@ +base_model = "anthropic/claude-opus-4-8" +reasoning_options = [{ type = "toggle" }, { type = "budget_tokens", min = 1_024, max = 127_999 }] + +[cost] +input = 5 +output = 25 +cache_read = 0.5 +cache_write = 6.25 + +[experimental.modes.fast] +cost = { input = 10, output = 50, cache_read = 1, cache_write = 12.5 } +provider = { body = { speed = "fast" }, headers = { anthropic-beta = "fast-mode-2026-02-01" } } diff --git a/providers/neon/models/gemini-3-5-flash.toml b/providers/neon/models/gemini-3-5-flash.toml new file mode 100644 index 0000000000..0d75d4a0d3 --- /dev/null +++ b/providers/neon/models/gemini-3-5-flash.toml @@ -0,0 +1,8 @@ +base_model = "google/gemini-3.5-flash" +reasoning_options = [{ type = "effort", values = ["minimal", "low", "medium", "high"] }] + +[cost] +input = 1.5 +output = 9 +cache_read = 0.15 +input_audio = 1.5 diff --git a/providers/neon/models/gemma-3-12b.toml b/providers/neon/models/gemma-3-12b.toml new file mode 100644 index 0000000000..8e11e47b78 --- /dev/null +++ b/providers/neon/models/gemma-3-12b.toml @@ -0,0 +1,23 @@ +name = "Gemma 3 12B" +family = "gemma" +release_date = "2025-03-13" +last_updated = "2025-03-13" +attachment = true +reasoning = false +temperature = true +tool_call = true +structured_output = true +knowledge = "2024-08-31" +open_weights = true + +[cost] +input = 0.15 +output = 0.5 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/neon/models/gpt-5-1-codex-max.toml b/providers/neon/models/gpt-5-1-codex-max.toml new file mode 100644 index 0000000000..57e22028d4 --- /dev/null +++ b/providers/neon/models/gpt-5-1-codex-max.toml @@ -0,0 +1,7 @@ +base_model = "openai/gpt-5.1-codex-max" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }] + +[cost] +input = 1.25 +output = 10 +cache_read = 0.125 diff --git a/providers/neon/models/gpt-5-1-codex-mini.toml b/providers/neon/models/gpt-5-1-codex-mini.toml new file mode 100644 index 0000000000..3cb2b12eeb --- /dev/null +++ b/providers/neon/models/gpt-5-1-codex-mini.toml @@ -0,0 +1,7 @@ +base_model = "openai/gpt-5.1-codex-mini" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }] + +[cost] +input = 0.25 +output = 2 +cache_read = 0.025 diff --git a/providers/neon/models/gpt-5-2-codex.toml b/providers/neon/models/gpt-5-2-codex.toml new file mode 100644 index 0000000000..c084afebb9 --- /dev/null +++ b/providers/neon/models/gpt-5-2-codex.toml @@ -0,0 +1,7 @@ +base_model = "openai/gpt-5.2-codex" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }] + +[cost] +input = 1.75 +output = 14 +cache_read = 0.175 diff --git a/providers/neon/models/gpt-5-3-codex.toml b/providers/neon/models/gpt-5-3-codex.toml new file mode 100644 index 0000000000..949c3ed11a --- /dev/null +++ b/providers/neon/models/gpt-5-3-codex.toml @@ -0,0 +1,7 @@ +base_model = "openai/gpt-5.3-codex" +reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high", "xhigh"] }] + +[cost] +input = 1.75 +output = 14 +cache_read = 0.175 diff --git a/providers/neon/models/gpt-5-5.toml b/providers/neon/models/gpt-5-5.toml deleted file mode 100644 index 9b565dd3a2..0000000000 --- a/providers/neon/models/gpt-5-5.toml +++ /dev/null @@ -1,17 +0,0 @@ -base_model = "openai/gpt-5.5" -reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high", "xhigh"] }] - -[cost] -input = 5 -output = 30 -cache_read = 0.5 - -[[cost.tiers]] -tier = { type = "context", size = 272_000 } -input = 10 -output = 45 -cache_read = 1 - -[experimental.modes.fast] -cost = { input = 12.5, output = 75, cache_read = 1.25 } -provider = { body = { service_tier = "priority" } } diff --git a/providers/neon/models/llama-4-maverick.toml b/providers/neon/models/llama-4-maverick.toml new file mode 100644 index 0000000000..2f7ae0d63c --- /dev/null +++ b/providers/neon/models/llama-4-maverick.toml @@ -0,0 +1,5 @@ +base_model = "meta/llama-4-maverick-17b-instruct" + +[cost] +input = 0.5 +output = 1.5 diff --git a/providers/neon/models/meta-llama-3-1-8b-instruct.toml b/providers/neon/models/meta-llama-3-1-8b-instruct.toml new file mode 100644 index 0000000000..b394413e0d --- /dev/null +++ b/providers/neon/models/meta-llama-3-1-8b-instruct.toml @@ -0,0 +1,23 @@ +name = "Llama 3.1 8B Instruct" +family = "llama" +release_date = "2024-07-23" +last_updated = "2024-07-23" +attachment = false +reasoning = false +temperature = true +tool_call = true +structured_output = true +knowledge = "2023-12-31" +open_weights = true + +[cost] +input = 0.15 +output = 0.45 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/neon/models/meta-llama-3-3-70b-instruct.toml b/providers/neon/models/meta-llama-3-3-70b-instruct.toml new file mode 100644 index 0000000000..0fdd5af25d --- /dev/null +++ b/providers/neon/models/meta-llama-3-3-70b-instruct.toml @@ -0,0 +1,6 @@ +base_model = "meta/llama-3.3-70b-instruct" +attachment = false + +[cost] +input = 0.5 +output = 1.5 diff --git a/providers/neon/models/qwen3-next-80b-a3b-instruct.toml b/providers/neon/models/qwen3-next-80b-a3b-instruct.toml new file mode 100644 index 0000000000..c21be63d5a --- /dev/null +++ b/providers/neon/models/qwen3-next-80b-a3b-instruct.toml @@ -0,0 +1,5 @@ +base_model = "alibaba/qwen3-next-80b-a3b-instruct" + +[cost] +input = 0.15 +output = 1.2 diff --git a/providers/neon/models/qwen35-122b-a10b.toml b/providers/neon/models/qwen35-122b-a10b.toml new file mode 100644 index 0000000000..0bd9f09b5c --- /dev/null +++ b/providers/neon/models/qwen35-122b-a10b.toml @@ -0,0 +1,14 @@ +base_model = "alibaba/qwen3.5-122b-a10b" +attachment = false + +[cost] +input = 0.22 +output = 2.2 + +[limit] +context = 262_144 +output = 8_000 + +[modalities] +input = ["text"] +output = ["text"] From 8c1ae6860c5ab71ae1d5feae38f512999e32c82b Mon Sep 17 00:00:00 2001 From: Andre Landgraf Date: Fri, 3 Jul 2026 14:17:17 -0700 Subject: [PATCH 2/3] neon: add required description to inline gemma-3-12b and llama-3.1-8b The two inline models (no base_model to inherit from) were missing the schema-required `description` field, failing CI validation. --- providers/neon/models/gemma-3-12b.toml | 1 + providers/neon/models/meta-llama-3-1-8b-instruct.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/providers/neon/models/gemma-3-12b.toml b/providers/neon/models/gemma-3-12b.toml index 8e11e47b78..963b076df4 100644 --- a/providers/neon/models/gemma-3-12b.toml +++ b/providers/neon/models/gemma-3-12b.toml @@ -1,4 +1,5 @@ name = "Gemma 3 12B" +description = "Google's open-weight Gemma 3 vision-language model for text and image understanding" family = "gemma" release_date = "2025-03-13" last_updated = "2025-03-13" diff --git a/providers/neon/models/meta-llama-3-1-8b-instruct.toml b/providers/neon/models/meta-llama-3-1-8b-instruct.toml index b394413e0d..000b16fcdd 100644 --- a/providers/neon/models/meta-llama-3-1-8b-instruct.toml +++ b/providers/neon/models/meta-llama-3-1-8b-instruct.toml @@ -1,4 +1,5 @@ name = "Llama 3.1 8B Instruct" +description = "Meta's compact open-weight Llama 3.1 model for fast, low-cost text generation" family = "llama" release_date = "2024-07-23" last_updated = "2024-07-23" From f12ca516493c47359b7ec028ec8e5244792d74e6 Mon Sep 17 00:00:00 2001 From: Andre Landgraf Date: Fri, 3 Jul 2026 14:20:38 -0700 Subject: [PATCH 3/3] neon: add reasoning_options to qwen35-122b-a10b Qwen3.5 122B inherits reasoning=true, so the schema requires reasoning_options. Mirrors the canonical alibaba entry (toggle + budget_tokens). --- providers/neon/models/qwen35-122b-a10b.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/providers/neon/models/qwen35-122b-a10b.toml b/providers/neon/models/qwen35-122b-a10b.toml index 0bd9f09b5c..d5afed3343 100644 --- a/providers/neon/models/qwen35-122b-a10b.toml +++ b/providers/neon/models/qwen35-122b-a10b.toml @@ -1,5 +1,6 @@ base_model = "alibaba/qwen3.5-122b-a10b" attachment = false +reasoning_options = [{ type = "toggle" }, { type = "budget_tokens" }] [cost] input = 0.22