From 78f2ead38af09602dd692bd17f9750f1ad8cb263 Mon Sep 17 00:00:00 2001
From: Andre Landgraf <andre.timo.landgraf@gmail.com>
Date: Fri, 3 Jul 2026 11:41:48 -0700
Subject: [PATCH 1/3] neon: sync catalog with live AI Gateway probe (add 12,
 remove gpt-5-5)

Verified every Databricks Foundation Model API endpoint against a live Neon
AI Gateway branch (us-east-2). Adds 12 models confirmed working (with
live-checked image-input + tool-calling capabilities) and removes gpt-5-5,
which the gateway rejects as an unknown model.
---
 providers/neon/models/claude-opus-4-8.toml    | 12 ++++++++++
 providers/neon/models/gemini-3-5-flash.toml   |  8 +++++++
 providers/neon/models/gemma-3-12b.toml        | 23 +++++++++++++++++++
 providers/neon/models/gpt-5-1-codex-max.toml  |  7 ++++++
 providers/neon/models/gpt-5-1-codex-mini.toml |  7 ++++++
 providers/neon/models/gpt-5-2-codex.toml      |  7 ++++++
 providers/neon/models/gpt-5-3-codex.toml      |  7 ++++++
 providers/neon/models/gpt-5-5.toml            | 17 --------------
 providers/neon/models/llama-4-maverick.toml   |  5 ++++
 .../models/meta-llama-3-1-8b-instruct.toml    | 23 +++++++++++++++++++
 .../models/meta-llama-3-3-70b-instruct.toml   |  6 +++++
 .../models/qwen3-next-80b-a3b-instruct.toml   |  5 ++++
 providers/neon/models/qwen35-122b-a10b.toml   | 14 +++++++++++
 13 files changed, 124 insertions(+), 17 deletions(-)
 create mode 100644 providers/neon/models/claude-opus-4-8.toml
 create mode 100644 providers/neon/models/gemini-3-5-flash.toml
 create mode 100644 providers/neon/models/gemma-3-12b.toml
 create mode 100644 providers/neon/models/gpt-5-1-codex-max.toml
 create mode 100644 providers/neon/models/gpt-5-1-codex-mini.toml
 create mode 100644 providers/neon/models/gpt-5-2-codex.toml
 create mode 100644 providers/neon/models/gpt-5-3-codex.toml
 delete mode 100644 providers/neon/models/gpt-5-5.toml
 create mode 100644 providers/neon/models/llama-4-maverick.toml
 create mode 100644 providers/neon/models/meta-llama-3-1-8b-instruct.toml
 create mode 100644 providers/neon/models/meta-llama-3-3-70b-instruct.toml
 create mode 100644 providers/neon/models/qwen3-next-80b-a3b-instruct.toml
 create mode 100644 providers/neon/models/qwen35-122b-a10b.toml

diff --git a/providers/neon/models/claude-opus-4-8.toml b/providers/neon/models/claude-opus-4-8.toml
new file mode 100644
index 0000000000..980c04951e
--- /dev/null
+++ b/providers/neon/models/claude-opus-4-8.toml
@@ -0,0 +1,12 @@
+base_model = "anthropic/claude-opus-4-8"
+reasoning_options = [{ type = "toggle" }, { type = "budget_tokens", min = 1_024, max = 127_999 }]
+
+[cost]
+input = 5
+output = 25
+cache_read = 0.5
+cache_write = 6.25
+
+[experimental.modes.fast]
+cost = { input = 10, output = 50, cache_read = 1, cache_write = 12.5 }
+provider = { body = { speed = "fast" }, headers = { anthropic-beta = "fast-mode-2026-02-01" } }
diff --git a/providers/neon/models/gemini-3-5-flash.toml b/providers/neon/models/gemini-3-5-flash.toml
new file mode 100644
index 0000000000..0d75d4a0d3
--- /dev/null
+++ b/providers/neon/models/gemini-3-5-flash.toml
@@ -0,0 +1,8 @@
+base_model = "google/gemini-3.5-flash"
+reasoning_options = [{ type = "effort", values = ["minimal", "low", "medium", "high"] }]
+
+[cost]
+input = 1.5
+output = 9
+cache_read = 0.15
+input_audio = 1.5
diff --git a/providers/neon/models/gemma-3-12b.toml b/providers/neon/models/gemma-3-12b.toml
new file mode 100644
index 0000000000..8e11e47b78
--- /dev/null
+++ b/providers/neon/models/gemma-3-12b.toml
@@ -0,0 +1,23 @@
+name = "Gemma 3 12B"
+family = "gemma"
+release_date = "2025-03-13"
+last_updated = "2025-03-13"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+structured_output = true
+knowledge = "2024-08-31"
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.5
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/neon/models/gpt-5-1-codex-max.toml b/providers/neon/models/gpt-5-1-codex-max.toml
new file mode 100644
index 0000000000..57e22028d4
--- /dev/null
+++ b/providers/neon/models/gpt-5-1-codex-max.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.1-codex-max"
+reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }]
+
+[cost]
+input = 1.25
+output = 10
+cache_read = 0.125
diff --git a/providers/neon/models/gpt-5-1-codex-mini.toml b/providers/neon/models/gpt-5-1-codex-mini.toml
new file mode 100644
index 0000000000..3cb2b12eeb
--- /dev/null
+++ b/providers/neon/models/gpt-5-1-codex-mini.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.1-codex-mini"
+reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }]
+
+[cost]
+input = 0.25
+output = 2
+cache_read = 0.025
diff --git a/providers/neon/models/gpt-5-2-codex.toml b/providers/neon/models/gpt-5-2-codex.toml
new file mode 100644
index 0000000000..c084afebb9
--- /dev/null
+++ b/providers/neon/models/gpt-5-2-codex.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.2-codex"
+reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }]
+
+[cost]
+input = 1.75
+output = 14
+cache_read = 0.175
diff --git a/providers/neon/models/gpt-5-3-codex.toml b/providers/neon/models/gpt-5-3-codex.toml
new file mode 100644
index 0000000000..949c3ed11a
--- /dev/null
+++ b/providers/neon/models/gpt-5-3-codex.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.3-codex"
+reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high", "xhigh"] }]
+
+[cost]
+input = 1.75
+output = 14
+cache_read = 0.175
diff --git a/providers/neon/models/gpt-5-5.toml b/providers/neon/models/gpt-5-5.toml
deleted file mode 100644
index 9b565dd3a2..0000000000
--- a/providers/neon/models/gpt-5-5.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-base_model = "openai/gpt-5.5"
-reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high", "xhigh"] }]
-
-[cost]
-input = 5
-output = 30
-cache_read = 0.5
-
-[[cost.tiers]]
-tier = { type = "context", size = 272_000 }
-input = 10
-output = 45
-cache_read = 1
-
-[experimental.modes.fast]
-cost = { input = 12.5, output = 75, cache_read = 1.25 }
-provider = { body = { service_tier = "priority" } }
diff --git a/providers/neon/models/llama-4-maverick.toml b/providers/neon/models/llama-4-maverick.toml
new file mode 100644
index 0000000000..2f7ae0d63c
--- /dev/null
+++ b/providers/neon/models/llama-4-maverick.toml
@@ -0,0 +1,5 @@
+base_model = "meta/llama-4-maverick-17b-instruct"
+
+[cost]
+input = 0.5
+output = 1.5
diff --git a/providers/neon/models/meta-llama-3-1-8b-instruct.toml b/providers/neon/models/meta-llama-3-1-8b-instruct.toml
new file mode 100644
index 0000000000..b394413e0d
--- /dev/null
+++ b/providers/neon/models/meta-llama-3-1-8b-instruct.toml
@@ -0,0 +1,23 @@
+name = "Llama 3.1 8B Instruct"
+family = "llama"
+release_date = "2024-07-23"
+last_updated = "2024-07-23"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+structured_output = true
+knowledge = "2023-12-31"
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.45
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/neon/models/meta-llama-3-3-70b-instruct.toml b/providers/neon/models/meta-llama-3-3-70b-instruct.toml
new file mode 100644
index 0000000000..0fdd5af25d
--- /dev/null
+++ b/providers/neon/models/meta-llama-3-3-70b-instruct.toml
@@ -0,0 +1,6 @@
+base_model = "meta/llama-3.3-70b-instruct"
+attachment = false
+
+[cost]
+input = 0.5
+output = 1.5
diff --git a/providers/neon/models/qwen3-next-80b-a3b-instruct.toml b/providers/neon/models/qwen3-next-80b-a3b-instruct.toml
new file mode 100644
index 0000000000..c21be63d5a
--- /dev/null
+++ b/providers/neon/models/qwen3-next-80b-a3b-instruct.toml
@@ -0,0 +1,5 @@
+base_model = "alibaba/qwen3-next-80b-a3b-instruct"
+
+[cost]
+input = 0.15
+output = 1.2
diff --git a/providers/neon/models/qwen35-122b-a10b.toml b/providers/neon/models/qwen35-122b-a10b.toml
new file mode 100644
index 0000000000..0bd9f09b5c
--- /dev/null
+++ b/providers/neon/models/qwen35-122b-a10b.toml
@@ -0,0 +1,14 @@
+base_model = "alibaba/qwen3.5-122b-a10b"
+attachment = false
+
+[cost]
+input = 0.22
+output = 2.2
+
+[limit]
+context = 262_144
+output = 8_000
+
+[modalities]
+input = ["text"]
+output = ["text"]

From 8c1ae6860c5ab71ae1d5feae38f512999e32c82b Mon Sep 17 00:00:00 2001
From: Andre Landgraf <andre.timo.landgraf@gmail.com>
Date: Fri, 3 Jul 2026 14:17:17 -0700
Subject: [PATCH 2/3] neon: add required description to inline gemma-3-12b and
 llama-3.1-8b

The two inline models (no base_model to inherit from) were missing the
schema-required `description` field, failing CI validation.
---
 providers/neon/models/gemma-3-12b.toml                | 1 +
 providers/neon/models/meta-llama-3-1-8b-instruct.toml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/providers/neon/models/gemma-3-12b.toml b/providers/neon/models/gemma-3-12b.toml
index 8e11e47b78..963b076df4 100644
--- a/providers/neon/models/gemma-3-12b.toml
+++ b/providers/neon/models/gemma-3-12b.toml
@@ -1,4 +1,5 @@
 name = "Gemma 3 12B"
+description = "Google's open-weight Gemma 3 vision-language model for text and image understanding"
 family = "gemma"
 release_date = "2025-03-13"
 last_updated = "2025-03-13"
diff --git a/providers/neon/models/meta-llama-3-1-8b-instruct.toml b/providers/neon/models/meta-llama-3-1-8b-instruct.toml
index b394413e0d..000b16fcdd 100644
--- a/providers/neon/models/meta-llama-3-1-8b-instruct.toml
+++ b/providers/neon/models/meta-llama-3-1-8b-instruct.toml
@@ -1,4 +1,5 @@
 name = "Llama 3.1 8B Instruct"
+description = "Meta's compact open-weight Llama 3.1 model for fast, low-cost text generation"
 family = "llama"
 release_date = "2024-07-23"
 last_updated = "2024-07-23"

From f12ca516493c47359b7ec028ec8e5244792d74e6 Mon Sep 17 00:00:00 2001
From: Andre Landgraf <andre.timo.landgraf@gmail.com>
Date: Fri, 3 Jul 2026 14:20:38 -0700
Subject: [PATCH 3/3] neon: add reasoning_options to qwen35-122b-a10b

Qwen3.5 122B inherits reasoning=true, so the schema requires reasoning_options.
Mirrors the canonical alibaba entry (toggle + budget_tokens).
---
 providers/neon/models/qwen35-122b-a10b.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/providers/neon/models/qwen35-122b-a10b.toml b/providers/neon/models/qwen35-122b-a10b.toml
index 0bd9f09b5c..d5afed3343 100644
--- a/providers/neon/models/qwen35-122b-a10b.toml
+++ b/providers/neon/models/qwen35-122b-a10b.toml
@@ -1,5 +1,6 @@
 base_model = "alibaba/qwen3.5-122b-a10b"
 attachment = false
+reasoning_options = [{ type = "toggle" }, { type = "budget_tokens" }]
 
 [cost]
 input = 0.22