From 4c78ea9f36e4a41d795187d759dee2e117417b56 Mon Sep 17 00:00:00 2001
From: manascb1344 <zephop76593@gmail.com>
Date: Wed, 4 Feb 2026 20:16:01 +0530
Subject: [PATCH 1/6] feat(nebius): add new providers for Nebius Token Factory

- Add MiniMaxAI provider with MiniMax-M2.1 model
- Add PrimeIntellect provider with INTELLECT-3 model
- Add black-forest-labs provider with FLUX.1-schnell and FLUX.1-dev
- Add BAAI provider with bge-multilingual-gemma2 and BGE-ICL
- Add intfloat provider with e5-mistral-7b-instruct
- Add Google provider with Gemma-2-2b-it, Gemma-2-9b-it-fast, Gemma-3-27b-it, and Gemma-3-27b-it-fast
---
 providers/nebius/models/BAAI/bge-en-icl.toml  | 24 +++++++++++++++
 .../models/BAAI/bge-multilingual-gemma2.toml  | 24 +++++++++++++++
 .../nebius/models/MiniMaxAI/minimax-m2.1.toml | 29 +++++++++++++++++++
 .../models/PrimeIntellect/intellect-3.toml    | 25 ++++++++++++++++
 .../models/black-forest-labs/flux-dev.toml    | 23 +++++++++++++++
 .../black-forest-labs/flux-schnell.toml       | 23 +++++++++++++++
 .../nebius/models/google/gemma-2-2b-it.toml   | 25 ++++++++++++++++
 .../models/google/gemma-2-9b-it-fast.toml     | 25 ++++++++++++++++
 .../models/google/gemma-3-27b-it-fast.toml    | 25 ++++++++++++++++
 .../nebius/models/google/gemma-3-27b-it.toml  | 25 ++++++++++++++++
 .../intfloat/e5-mistral-7b-instruct.toml      | 24 +++++++++++++++
 11 files changed, 272 insertions(+)
 create mode 100644 providers/nebius/models/BAAI/bge-en-icl.toml
 create mode 100644 providers/nebius/models/BAAI/bge-multilingual-gemma2.toml
 create mode 100644 providers/nebius/models/MiniMaxAI/minimax-m2.1.toml
 create mode 100644 providers/nebius/models/PrimeIntellect/intellect-3.toml
 create mode 100644 providers/nebius/models/black-forest-labs/flux-dev.toml
 create mode 100644 providers/nebius/models/black-forest-labs/flux-schnell.toml
 create mode 100644 providers/nebius/models/google/gemma-2-2b-it.toml
 create mode 100644 providers/nebius/models/google/gemma-2-9b-it-fast.toml
 create mode 100644 providers/nebius/models/google/gemma-3-27b-it-fast.toml
 create mode 100644 providers/nebius/models/google/gemma-3-27b-it.toml
 create mode 100644 providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml

diff --git a/providers/nebius/models/BAAI/bge-en-icl.toml b/providers/nebius/models/BAAI/bge-en-icl.toml
new file mode 100644
index 000000000..d1fb98e82
--- /dev/null
+++ b/providers/nebius/models/BAAI/bge-en-icl.toml
@@ -0,0 +1,24 @@
+name = "BGE-ICL"
+family = "text-embedding"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = false
+knowledge = "2024-06"
+release_date = "2024-07-30"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.01
+output = 0.00
+
+[limit]
+context = 32_768
+input = 32_768
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/BAAI/bge-multilingual-gemma2.toml b/providers/nebius/models/BAAI/bge-multilingual-gemma2.toml
new file mode 100644
index 000000000..e238898eb
--- /dev/null
+++ b/providers/nebius/models/BAAI/bge-multilingual-gemma2.toml
@@ -0,0 +1,24 @@
+name = "bge-multilingual-gemma2"
+family = "text-embedding"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = false
+knowledge = "2024-06"
+release_date = "2024-07-30"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.01
+output = 0.00
+
+[limit]
+context = 8_192
+input = 8_192
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/MiniMaxAI/minimax-m2.1.toml b/providers/nebius/models/MiniMaxAI/minimax-m2.1.toml
new file mode 100644
index 000000000..add20a0a4
--- /dev/null
+++ b/providers/nebius/models/MiniMaxAI/minimax-m2.1.toml
@@ -0,0 +1,29 @@
+name = "MiniMax-M2.1"
+attachment = false
+reasoning = true
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-10"
+release_date = "2026-02-01"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.30
+output = 1.20
+reasoning = 1.20
+cache_read = 0.03
+cache_write = 0.375
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/nebius/models/PrimeIntellect/intellect-3.toml b/providers/nebius/models/PrimeIntellect/intellect-3.toml
new file mode 100644
index 000000000..743b1edb1
--- /dev/null
+++ b/providers/nebius/models/PrimeIntellect/intellect-3.toml
@@ -0,0 +1,25 @@
+name = "INTELLECT-3"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-10"
+release_date = "2026-01-25"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 1.10
+cache_read = 0.02
+cache_write = 0.25
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/black-forest-labs/flux-dev.toml b/providers/nebius/models/black-forest-labs/flux-dev.toml
new file mode 100644
index 000000000..edab8b75b
--- /dev/null
+++ b/providers/nebius/models/black-forest-labs/flux-dev.toml
@@ -0,0 +1,23 @@
+name = "FLUX.1-dev"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = false
+knowledge = "2024-07"
+release_date = "2024-08-01"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.00
+output = 0.00
+
+[limit]
+context = 77
+input = 77
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["image"]
\ No newline at end of file
diff --git a/providers/nebius/models/black-forest-labs/flux-schnell.toml b/providers/nebius/models/black-forest-labs/flux-schnell.toml
new file mode 100644
index 000000000..9b3f2a93f
--- /dev/null
+++ b/providers/nebius/models/black-forest-labs/flux-schnell.toml
@@ -0,0 +1,23 @@
+name = "FLUX.1-schnell"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = false
+knowledge = "2024-07"
+release_date = "2024-08-01"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.00
+output = 0.00
+
+[limit]
+context = 77
+input = 77
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["image"]
\ No newline at end of file
diff --git a/providers/nebius/models/google/gemma-2-2b-it.toml b/providers/nebius/models/google/gemma-2-2b-it.toml
new file mode 100644
index 000000000..f5e60c4ed
--- /dev/null
+++ b/providers/nebius/models/google/gemma-2-2b-it.toml
@@ -0,0 +1,25 @@
+name = "Gemma-2-2b-it"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = true
+knowledge = "2024-06"
+release_date = "2024-07-31"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.06
+cache_read = 0.002
+cache_write = 0.025
+
+[limit]
+context = 8_192
+input = 8_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/google/gemma-2-9b-it-fast.toml b/providers/nebius/models/google/gemma-2-9b-it-fast.toml
new file mode 100644
index 000000000..f7eab6454
--- /dev/null
+++ b/providers/nebius/models/google/gemma-2-9b-it-fast.toml
@@ -0,0 +1,25 @@
+name = "Gemma-2-9b-it (Fast)"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = true
+knowledge = "2024-06"
+release_date = "2024-06-27"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.09
+cache_read = 0.003
+cache_write = 0.0375
+
+[limit]
+context = 8_192
+input = 8_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/google/gemma-3-27b-it-fast.toml b/providers/nebius/models/google/gemma-3-27b-it-fast.toml
new file mode 100644
index 000000000..ab80f0f5f
--- /dev/null
+++ b/providers/nebius/models/google/gemma-3-27b-it-fast.toml
@@ -0,0 +1,25 @@
+name = "Gemma-3-27b-it (Fast)"
+attachment = true
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-10"
+release_date = "2026-01-20"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.60
+cache_read = 0.02
+cache_write = 0.25
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/google/gemma-3-27b-it.toml b/providers/nebius/models/google/gemma-3-27b-it.toml
new file mode 100644
index 000000000..24e4a8fea
--- /dev/null
+++ b/providers/nebius/models/google/gemma-3-27b-it.toml
@@ -0,0 +1,25 @@
+name = "Gemma-3-27b-it"
+attachment = true
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-10"
+release_date = "2026-01-20"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.30
+cache_read = 0.01
+cache_write = 0.125
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml b/providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml
new file mode 100644
index 000000000..e19f6ddfc
--- /dev/null
+++ b/providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml
@@ -0,0 +1,24 @@
+name = "e5-mistral-7b-instruct"
+family = "text-embedding"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = false
+knowledge = "2023-12"
+release_date = "2024-01-01"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.01
+output = 0.00
+
+[limit]
+context = 32_768
+input = 32_768
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file

From 51fe42982fbc0eaf4dc038c26d2f3fa754b3b724 Mon Sep 17 00:00:00 2001
From: manascb1344 <zephop76593@gmail.com>
Date: Wed, 4 Feb 2026 20:16:21 +0530
Subject: [PATCH 2/6] feat(nebius): add DeepSeek models to Token Factory

- Add DeepSeek-V3.2, DeepSeek-V3-0324 (Base & Fast), DeepSeek-R1-0528 (Base & Fast)
- These are new models available on Nebius Token Factory
---
 .../deepseek-ai/deepseek-r1-0528-fast.toml    | 22 ++++++++++++++
 .../models/deepseek-ai/deepseek-r1-0528.toml  | 29 +++++++++++++++++++
 .../deepseek-ai/deepseek-v3-0324-fast.toml    | 25 ++++++++++++++++
 .../models/deepseek-ai/deepseek-v3-0324.toml  | 25 ++++++++++++++++
 .../models/deepseek-ai/deepseek-v3.2.toml     | 29 +++++++++++++++++++
 5 files changed, 130 insertions(+)
 create mode 100644 providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml
 create mode 100644 providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml
 create mode 100644 providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml
 create mode 100644 providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml
 create mode 100644 providers/nebius/models/deepseek-ai/deepseek-v3.2.toml

diff --git a/providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml b/providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml
new file mode 100644
index 000000000..bf2c825ca
--- /dev/null
+++ b/providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml
@@ -0,0 +1,22 @@
+name = "DeepSeek R1 0528 Fast"
+family = "deepseek"
+release_date = "2025-01-01"
+last_updated = "2025-02-04"
+attachment = false
+reasoning = true
+temperature = true
+knowledge = "2025-01"
+tool_call = true
+open_weights = true
+
+[cost]
+input = 2.00
+output = 6.00
+
+[limit]
+context = 131072
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml b/providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml
new file mode 100644
index 000000000..c107fff95
--- /dev/null
+++ b/providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml
@@ -0,0 +1,29 @@
+name = "DeepSeek-R1-0528"
+attachment = false
+reasoning = true
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-11"
+release_date = "2026-01-15"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.80
+output = 2.40
+reasoning = 2.40
+cache_read = 0.08
+cache_write = 1.00
+
+[limit]
+context = 128_000
+input = 120_000
+output = 32_768
+
+[modalities]
+input = ["text"]
+output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml b/providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml
new file mode 100644
index 000000000..97974a613
--- /dev/null
+++ b/providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml
@@ -0,0 +1,25 @@
+name = "DeepSeek-V3-0324 (Fast)"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2024-12"
+release_date = "2025-03-24"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.75
+output = 2.25
+cache_read = 0.075
+cache_write = 0.28125
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml b/providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml
new file mode 100644
index 000000000..c86165e13
--- /dev/null
+++ b/providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml
@@ -0,0 +1,25 @@
+name = "DeepSeek-V3-0324"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2024-12"
+release_date = "2025-03-24"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.50
+output = 1.50
+cache_read = 0.05
+cache_write = 0.1875
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3.2.toml b/providers/nebius/models/deepseek-ai/deepseek-v3.2.toml
new file mode 100644
index 000000000..330071236
--- /dev/null
+++ b/providers/nebius/models/deepseek-ai/deepseek-v3.2.toml
@@ -0,0 +1,29 @@
+name = "DeepSeek-V3.2"
+attachment = false
+reasoning = true
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-11"
+release_date = "2026-01-20"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.30
+output = 0.45
+reasoning = 0.45
+cache_read = 0.03
+cache_write = 0.375
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file

From ca6206b78e49cd8ec2a84c9bcf9721c8839afd42 Mon Sep 17 00:00:00 2001
From: manascb1344 <zephop76593@gmail.com>
Date: Wed, 4 Feb 2026 20:16:46 +0530
Subject: [PATCH 3/6] feat(nebius): add Qwen models to Token Factory

- Add Qwen3-Next-80B-A3B-Thinking
- Add Qwen3-30B-A3B-Thinking-2507 and Qwen3-30B-A3B-Instruct-2507
- Add Qwen3-Coder-30B-A3B-Instruct
- Add Qwen3-32B (Base & Fast)
- Add Qwen2.5-Coder-7B-fast
- Add Qwen2.5-VL-72B-Instruct
- Add Qwen3-Embedding-8B
---
 .../models/qwen/qwen2.5-coder-7b-fast.toml    | 25 ++++++++++++++++
 .../models/qwen/qwen2.5-vl-72b-instruct.toml  | 25 ++++++++++++++++
 .../qwen/qwen3-30b-a3b-instruct-2507.toml     | 25 ++++++++++++++++
 .../qwen/qwen3-30b-a3b-thinking-2507.toml     | 29 +++++++++++++++++++
 .../nebius/models/qwen/qwen3-32b-fast.toml    | 25 ++++++++++++++++
 providers/nebius/models/qwen/qwen3-32b.toml   | 25 ++++++++++++++++
 .../qwen/qwen3-coder-30b-a3b-instruct.toml    | 25 ++++++++++++++++
 .../models/qwen/qwen3-embedding-8b.toml       | 24 +++++++++++++++
 .../qwen/qwen3-next-80b-a3b-thinking.toml     | 29 +++++++++++++++++++
 9 files changed, 232 insertions(+)
 create mode 100644 providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml
 create mode 100644 providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml
 create mode 100644 providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml
 create mode 100644 providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml
 create mode 100644 providers/nebius/models/qwen/qwen3-32b-fast.toml
 create mode 100644 providers/nebius/models/qwen/qwen3-32b.toml
 create mode 100644 providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml
 create mode 100644 providers/nebius/models/qwen/qwen3-embedding-8b.toml
 create mode 100644 providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml

diff --git a/providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml b/providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml
new file mode 100644
index 000000000..34dc142e7
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml
@@ -0,0 +1,25 @@
+name = "Qwen2.5-Coder-7B (Fast)"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2024-09"
+release_date = "2024-09-19"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.09
+cache_read = 0.003
+cache_write = 0.03
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml b/providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml
new file mode 100644
index 000000000..b21f2c926
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml
@@ -0,0 +1,25 @@
+name = "Qwen2.5-VL-72B-Instruct"
+attachment = true
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2024-12"
+release_date = "2025-01-20"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.25
+output = 0.75
+cache_read = 0.025
+cache_write = 0.31
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml b/providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml
new file mode 100644
index 000000000..96c2d3f36
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml
@@ -0,0 +1,25 @@
+name = "Qwen3-30B-A3B-Instruct-2507"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-12"
+release_date = "2026-01-28"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.30
+cache_read = 0.01
+cache_write = 0.125
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml b/providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml
new file mode 100644
index 000000000..375b81e4e
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml
@@ -0,0 +1,29 @@
+name = "Qwen3-30B-A3B-Thinking-2507"
+attachment = false
+reasoning = true
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-12"
+release_date = "2026-01-28"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.30
+reasoning = 0.30
+cache_read = 0.01
+cache_write = 0.125
+
+[limit]
+context = 128_000
+input = 120_000
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen3-32b-fast.toml b/providers/nebius/models/qwen/qwen3-32b-fast.toml
new file mode 100644
index 000000000..b14e5b8f8
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen3-32b-fast.toml
@@ -0,0 +1,25 @@
+name = "Qwen3-32B (Fast)"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-12"
+release_date = "2026-01-28"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.20
+output = 0.60
+cache_read = 0.02
+cache_write = 0.25
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen3-32b.toml b/providers/nebius/models/qwen/qwen3-32b.toml
new file mode 100644
index 000000000..6c619f6e5
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen3-32b.toml
@@ -0,0 +1,25 @@
+name = "Qwen3-32B"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-12"
+release_date = "2026-01-28"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.30
+cache_read = 0.01
+cache_write = 0.125
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml b/providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml
new file mode 100644
index 000000000..5d3d1849e
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml
@@ -0,0 +1,25 @@
+name = "Qwen3-Coder-30B-A3B-Instruct"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-12"
+release_date = "2026-01-28"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.10
+output = 0.30
+cache_read = 0.01
+cache_write = 0.125
+
+[limit]
+context = 128_000
+input = 120_000
+output = 8_192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen3-embedding-8b.toml b/providers/nebius/models/qwen/qwen3-embedding-8b.toml
new file mode 100644
index 000000000..205cee1bc
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen3-embedding-8b.toml
@@ -0,0 +1,24 @@
+name = "Qwen3-Embedding-8B"
+family = "text-embedding"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = false
+temperature = false
+knowledge = "2025-10"
+release_date = "2026-01-10"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.01
+output = 0.00
+
+[limit]
+context = 32_768
+input = 32_768
+output = 0
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml b/providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml
new file mode 100644
index 000000000..ca0b79132
--- /dev/null
+++ b/providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml
@@ -0,0 +1,29 @@
+name = "Qwen3-Next-80B-A3B-Thinking"
+attachment = false
+reasoning = true
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-12"
+release_date = "2026-01-28"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.15
+output = 1.20
+reasoning = 1.20
+cache_read = 0.015
+cache_write = 0.18
+
+[limit]
+context = 128_000
+input = 120_000
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file

From 2639e20a97118e4ebc438fb8e1111be1c878544f Mon Sep 17 00:00:00 2001
From: manascb1344 <zephop76593@gmail.com>
Date: Wed, 4 Feb 2026 20:17:14 +0530
Subject: [PATCH 4/6] feat(nebius): add new models from Z.ai, Moonshot AI,
 Meta, and NVIDIA

- Add GLM-4.7 and GLM-4.7-FP8 (Z.ai)
- Add Kimi-K2-Thinking (Moonshot AI)
- Add Llama-Guard-3-8B, Meta-Llama-3.1-8B-Instruct (Base & Fast) (Meta)
- Add Nemotron-Nano-V2-12b and NVIDIA-Nemotron-3-Nano-30B-A3B (NVIDIA)
---
 .../models/meta-llama/llama-guard-3-8b.toml   | 25 ++++++++++++++++
 .../meta-llama-3.1-8b-instruct-fast.toml      | 25 ++++++++++++++++
 .../meta-llama-3.1-8b-instruct.toml           | 25 ++++++++++++++++
 .../models/moonshotai/kimi-k2-thinking.toml   | 29 +++++++++++++++++++
 .../models/nvidia/nemotron-nano-v2-12b.toml   | 25 ++++++++++++++++
 .../nvidia-nemotron-3-nano-30b-a3b.toml       | 25 ++++++++++++++++
 .../nebius/models/zai-org/glm-4.7-fp8.toml    | 25 ++++++++++++++++
 providers/nebius/models/zai-org/glm-4.7.toml  | 22 ++++++++++++++
 8 files changed, 201 insertions(+)
 create mode 100644 providers/nebius/models/meta-llama/llama-guard-3-8b.toml
 create mode 100644 providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml
 create mode 100644 providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml
 create mode 100644 providers/nebius/models/moonshotai/kimi-k2-thinking.toml
 create mode 100644 providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml
 create mode 100644 providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml
 create mode 100644 providers/nebius/models/zai-org/glm-4.7-fp8.toml
 create mode 100644 providers/nebius/models/zai-org/glm-4.7.toml

diff --git a/providers/nebius/models/meta-llama/llama-guard-3-8b.toml b/providers/nebius/models/meta-llama/llama-guard-3-8b.toml
new file mode 100644
index 000000000..c072b096f
--- /dev/null
+++ b/providers/nebius/models/meta-llama/llama-guard-3-8b.toml
@@ -0,0 +1,25 @@
+name = "Llama-Guard-3-8B"
+attachment = false
+reasoning = false
+tool_call = false
+structured_output = true
+temperature = false
+knowledge = "2024-04"
+release_date = "2024-04-18"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.06
+cache_read = 0.002
+cache_write = 0.025
+
+[limit]
+context = 8_192
+input = 8_000
+output = 1_024
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml
new file mode 100644
index 000000000..ac2c13268
--- /dev/null
+++ b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml
@@ -0,0 +1,25 @@
+name = "Meta-Llama-3.1-8B-Instruct (Fast)"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2024-12"
+release_date = "2024-07-23"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.03
+output = 0.09
+cache_read = 0.003
+cache_write = 0.03
+
+[limit]
+context = 128_000
+input = 120_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml
new file mode 100644
index 000000000..ddb722a68
--- /dev/null
+++ b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml
@@ -0,0 +1,25 @@
+name = "Meta-Llama-3.1-8B-Instruct"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2024-12"
+release_date = "2024-07-23"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.02
+output = 0.06
+cache_read = 0.002
+cache_write = 0.025
+
+[limit]
+context = 128_000
+input = 120_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/moonshotai/kimi-k2-thinking.toml b/providers/nebius/models/moonshotai/kimi-k2-thinking.toml
new file mode 100644
index 000000000..a4af04c79
--- /dev/null
+++ b/providers/nebius/models/moonshotai/kimi-k2-thinking.toml
@@ -0,0 +1,29 @@
+name = "Kimi-K2-Thinking"
+attachment = true
+reasoning = true
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-10"
+release_date = "2026-01-05"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.60
+output = 2.50
+reasoning = 2.50
+cache_read = 0.06
+cache_write = 0.75
+
+[limit]
+context = 128_000
+input = 120_000
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml b/providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml
new file mode 100644
index 000000000..c11729d6b
--- /dev/null
+++ b/providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml
@@ -0,0 +1,25 @@
+name = "Nemotron-Nano-V2-12b"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-01"
+release_date = "2025-03-15"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.07
+output = 0.20
+cache_read = 0.007
+cache_write = 0.08
+
+[limit]
+context = 32_000
+input = 30_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml b/providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml
new file mode 100644
index 000000000..c29a4b957
--- /dev/null
+++ b/providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml
@@ -0,0 +1,25 @@
+name = "Nemotron-3-Nano-30B-A3B"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-05"
+release_date = "2025-08-10"
+last_updated = "2026-02-04"
+open_weights = true
+
+[cost]
+input = 0.06
+output = 0.24
+cache_read = 0.006
+cache_write = 0.075
+
+[limit]
+context = 32_000
+input = 30_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/zai-org/glm-4.7-fp8.toml b/providers/nebius/models/zai-org/glm-4.7-fp8.toml
new file mode 100644
index 000000000..856616689
--- /dev/null
+++ b/providers/nebius/models/zai-org/glm-4.7-fp8.toml
@@ -0,0 +1,25 @@
+name = "GLM-4.7 (FP8)"
+attachment = false
+reasoning = false
+tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-12"
+release_date = "2026-01-15"
+last_updated = "2026-02-04"
+open_weights = false
+
+[cost]
+input = 0.40
+output = 2.00
+cache_read = 0.04
+cache_write = 0.50
+
+[limit]
+context = 128_000
+input = 124_000
+output = 4_096
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/zai-org/glm-4.7.toml b/providers/nebius/models/zai-org/glm-4.7.toml
new file mode 100644
index 000000000..115d44535
--- /dev/null
+++ b/providers/nebius/models/zai-org/glm-4.7.toml
@@ -0,0 +1,22 @@
+name = "GLM 4.7"
+family = "glm"
+release_date = "2025-01-01"
+last_updated = "2025-02-04"
+attachment = false
+reasoning = true
+temperature = true
+knowledge = "2025-01"
+tool_call = true
+open_weights = false
+
+[cost]
+input = 0.40
+output = 2.00
+
+[limit]
+context = 131072
+output = 8192
+
+[modalities]
+input = ["text"]
+output = ["text"]
\ No newline at end of file

From 578df73ffbfc93d8d7634d6d6c61d442d29c8b7d Mon Sep 17 00:00:00 2001
From: manascb1344 <zephop76593@gmail.com>
Date: Wed, 4 Feb 2026 20:17:51 +0530
Subject: [PATCH 5/6] feat(nebius): update Z.ai, OpenAI, Moonshot AI, and
 NousResearch models

- Update GLM-4.5 and GLM-4.5-Air with new pricing
- Update gpt-oss-120b and gpt-oss-20b with new pricing and features
- Update Kimi-K2-Instruct with new pricing and multimodal support
- Update Hermes-4-405B and Hermes-4-70B with new pricing
---
 .../models/NousResearch/hermes-4-405b.toml    | 25 ++++++++++-------
 .../models/NousResearch/hermes-4-70b.toml     | 25 ++++++++++-------
 .../models/moonshotai/kimi-k2-instruct.toml   | 27 ++++++++++---------
 .../nebius/models/openai/gpt-oss-120b.toml    | 27 ++++++++++++-------
 .../nebius/models/openai/gpt-oss-20b.toml     | 27 ++++++++++---------
 .../nebius/models/zai-org/glm-4.5-air.toml    | 23 +++++++++-------
 providers/nebius/models/zai-org/glm-4.5.toml  | 23 +++++++++-------
 7 files changed, 105 insertions(+), 72 deletions(-)

diff --git a/providers/nebius/models/NousResearch/hermes-4-405b.toml b/providers/nebius/models/NousResearch/hermes-4-405b.toml
index aabe24096..728ad3080 100644
--- a/providers/nebius/models/NousResearch/hermes-4-405b.toml
+++ b/providers/nebius/models/NousResearch/hermes-4-405b.toml
@@ -1,22 +1,29 @@
-name = "Hermes-4 405B"
-family = "hermes"
-release_date = "2024-08-01"
-last_updated = "2025-10-04"
+name = "Hermes-4-405B"
 attachment = false
 reasoning = true
-temperature = true
-knowledge = "2024-07"
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2025-11"
+release_date = "2026-01-30"
+last_updated = "2026-02-04"
+open_weights = true
 
 [cost]
 input = 1.00
 output = 3.00
+reasoning = 3.00
+cache_read = 0.10
+cache_write = 1.25
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 120_000
+output = 8_192
 
 [modalities]
 input = ["text"]
 output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/nebius/models/NousResearch/hermes-4-70b.toml b/providers/nebius/models/NousResearch/hermes-4-70b.toml
index e242ac6c2..7080f0827 100644
--- a/providers/nebius/models/NousResearch/hermes-4-70b.toml
+++ b/providers/nebius/models/NousResearch/hermes-4-70b.toml
@@ -1,22 +1,29 @@
-name = "Hermes 4 70B"
-family = "hermes"
-release_date = "2024-08-01"
-last_updated = "2025-10-04"
+name = "Hermes-4-70B"
 attachment = false
 reasoning = true
-temperature = true
-knowledge = "2024-07"
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2025-11"
+release_date = "2026-01-30"
+last_updated = "2026-02-04"
+open_weights = true
 
 [cost]
 input = 0.13
 output = 0.40
+reasoning = 0.40
+cache_read = 0.013
+cache_write = 0.16
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 120_000
+output = 8_192
 
 [modalities]
 input = ["text"]
 output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/nebius/models/moonshotai/kimi-k2-instruct.toml b/providers/nebius/models/moonshotai/kimi-k2-instruct.toml
index 84b8a7da5..d4c144c9a 100644
--- a/providers/nebius/models/moonshotai/kimi-k2-instruct.toml
+++ b/providers/nebius/models/moonshotai/kimi-k2-instruct.toml
@@ -1,22 +1,25 @@
-name = "Kimi K2 Instruct"
-family = "kimi"
-release_date = "2025-01-01"
-last_updated = "2025-10-04"
-attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-01"
+name = "Kimi-K2-Instruct"
+attachment = true
+reasoning = false
 tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-10"
+release_date = "2026-01-05"
+last_updated = "2026-02-04"
 open_weights = false
 
 [cost]
 input = 0.50
 output = 2.40
+cache_read = 0.05
+cache_write = 0.625
 
 [limit]
-context = 131072
-output = 8192
+context = 200_000
+input = 190_000
+output = 8_192
 
 [modalities]
-input = ["text"]
-output = ["text"]
+input = ["text", "image"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/openai/gpt-oss-120b.toml b/providers/nebius/models/openai/gpt-oss-120b.toml
index 0b28d7a9a..2e61d87f2 100644
--- a/providers/nebius/models/openai/gpt-oss-120b.toml
+++ b/providers/nebius/models/openai/gpt-oss-120b.toml
@@ -1,22 +1,29 @@
-name = "GPT OSS 120B"
-family = "gpt-oss"
-release_date = "2024-01-01"
-last_updated = "2025-10-04"
-attachment = true
+name = "gpt-oss-120b"
+attachment = false
 reasoning = true
-temperature = true
-knowledge = "2024-01"
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2025-09"
+release_date = "2026-01-10"
+last_updated = "2026-02-04"
+open_weights = true
 
 [cost]
 input = 0.15
 output = 0.60
+reasoning = 0.60
+cache_read = 0.015
+cache_write = 0.18
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 124_000
+output = 8_192
 
 [modalities]
 input = ["text"]
 output = ["text"]
+
+[interleaved]
+field = "reasoning_content"
\ No newline at end of file
diff --git a/providers/nebius/models/openai/gpt-oss-20b.toml b/providers/nebius/models/openai/gpt-oss-20b.toml
index 49dd525d1..d881b3da6 100644
--- a/providers/nebius/models/openai/gpt-oss-20b.toml
+++ b/providers/nebius/models/openai/gpt-oss-20b.toml
@@ -1,22 +1,25 @@
-name = "GPT OSS 20B"
-family = "gpt-oss"
-release_date = "2024-01-01"
-last_updated = "2025-10-04"
-attachment = true
-reasoning = true
-temperature = true
-knowledge = "2024-01"
+name = "gpt-oss-20b"
+attachment = false
+reasoning = false
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2025-09"
+release_date = "2026-01-10"
+last_updated = "2026-02-04"
+open_weights = true
 
 [cost]
 input = 0.05
 output = 0.20
+cache_read = 0.005
+cache_write = 0.06
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 124_000
+output = 4_096
 
 [modalities]
 input = ["text"]
-output = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/zai-org/glm-4.5-air.toml b/providers/nebius/models/zai-org/glm-4.5-air.toml
index 4cafdff07..f0eb1e418 100644
--- a/providers/nebius/models/zai-org/glm-4.5-air.toml
+++ b/providers/nebius/models/zai-org/glm-4.5-air.toml
@@ -1,22 +1,25 @@
-name = "GLM 4.5 Air"
-family = "glm-air"
-release_date = "2024-06-01"
-last_updated = "2025-10-04"
+name = "GLM-4.5-Air"
 attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-05"
+reasoning = false
 tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-06"
+release_date = "2025-11-15"
+last_updated = "2026-02-04"
 open_weights = false
 
 [cost]
 input = 0.20
 output = 1.20
+cache_read = 0.02
+cache_write = 0.25
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 124_000
+output = 4_096
 
 [modalities]
 input = ["text"]
-output = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/zai-org/glm-4.5.toml b/providers/nebius/models/zai-org/glm-4.5.toml
index cd27b825e..fbae120ad 100644
--- a/providers/nebius/models/zai-org/glm-4.5.toml
+++ b/providers/nebius/models/zai-org/glm-4.5.toml
@@ -1,22 +1,25 @@
-name = "GLM 4.5"
-family = "glm"
-release_date = "2024-06-01"
-last_updated = "2025-10-04"
+name = "GLM-4.5"
 attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-05"
+reasoning = false
 tool_call = true
+structured_output = true
+temperature = true
+knowledge = "2025-06"
+release_date = "2025-11-15"
+last_updated = "2026-02-04"
 open_weights = false
 
 [cost]
 input = 0.60
 output = 2.20
+cache_read = 0.06
+cache_write = 0.75
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 124_000
+output = 4_096
 
 [modalities]
 input = ["text"]
-output = ["text"]
+output = ["text"]
\ No newline at end of file

From 42a79c717d2b9a5834118243ca9b38429c977033 Mon Sep 17 00:00:00 2001
From: manascb1344 <zephop76593@gmail.com>
Date: Wed, 4 Feb 2026 20:18:37 +0530
Subject: [PATCH 6/6] feat(nebius): update Meta-Llama, NVIDIA models and mark
 deprecated

- Update Llama-3.3-70B-Instruct (Base & Fast) with new pricing
- Mark Llama-3.1-405B-Instruct as deprecated (no longer available)
- Update Llama-3.1-Nemotron-Ultra-253B-v1 with new pricing
- Mark DeepSeek-V3 as deprecated (replaced by V3.2 and V3-0324)
---
 .../models/deepseek-ai/deepseek-v3.toml       |  3 ++-
 .../llama-3.3-70b-instruct-base.toml          | 25 ++++++++++--------
 .../llama-3.3-70b-instruct-fast.toml          | 23 +++++++++-------
 .../meta-llama/llama-3_1-405b-instruct.toml   | 26 +++++++++++--------
 .../llama-3_1-nemotron-ultra-253b-v1.toml     | 25 ++++++++++--------
 5 files changed, 58 insertions(+), 44 deletions(-)

diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3.toml b/providers/nebius/models/deepseek-ai/deepseek-v3.toml
index 4a638b4b6..802211e73 100644
--- a/providers/nebius/models/deepseek-ai/deepseek-v3.toml
+++ b/providers/nebius/models/deepseek-ai/deepseek-v3.toml
@@ -1,13 +1,14 @@
 name = "DeepSeek V3"
 family = "deepseek"
 release_date = "2024-05-07"
-last_updated = "2025-10-04"
+last_updated = "2026-02-04"
 attachment = false
 reasoning = true
 temperature = true
 knowledge = "2024-04"
 tool_call = true
 open_weights = false
+status = "deprecated"
 
 [cost]
 input = 0.50
diff --git a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml
index ea561f5f3..4644871f3 100644
--- a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml
+++ b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml
@@ -1,22 +1,25 @@
-name = "Llama-3.3-70B-Instruct (Base)"
-family = "llama"
-release_date = "2024-08-22"
-last_updated = "2025-10-04"
+name = "Llama-3.3-70B-Instruct"
 attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-08"
+reasoning = false
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2025-08"
+release_date = "2025-12-05"
+last_updated = "2026-02-04"
+open_weights = true
 
 [cost]
 input = 0.13
 output = 0.40
+cache_read = 0.013
+cache_write = 0.16
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 120_000
+output = 8_192
 
 [modalities]
 input = ["text"]
-output = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml
index 5aabbdbad..baa5ca319 100644
--- a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml
+++ b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml
@@ -1,22 +1,25 @@
 name = "Llama-3.3-70B-Instruct (Fast)"
-family = "llama"
-release_date = "2024-08-22"
-last_updated = "2025-10-04"
 attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-08"
+reasoning = false
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2025-08"
+release_date = "2025-12-05"
+last_updated = "2026-02-04"
+open_weights = true
 
 [cost]
 input = 0.25
 output = 0.75
+cache_read = 0.025
+cache_write = 0.31
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 120_000
+output = 8_192
 
 [modalities]
 input = ["text"]
-output = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml b/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml
index a334266e6..9309ace60 100644
--- a/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml
+++ b/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml
@@ -1,22 +1,26 @@
-name = "Llama 3.1 405B Instruct"
-family = "llama"
-release_date = "2024-07-23"
-last_updated = "2025-10-04"
+name = "Llama-3.1-405B-Instruct"
 attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-03"
+reasoning = false
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2024-12"
+release_date = "2025-01-20"
+last_updated = "2026-02-04"
+open_weights = true
+status = "deprecated"
 
 [cost]
 input = 1.00
 output = 3.00
+cache_read = 0.10
+cache_write = 1.25
 
 [limit]
-context = 131072
-output = 8192
+context = 131_072
+input = 124_000
+output = 8_192
 
 [modalities]
 input = ["text"]
-output = ["text"]
+output = ["text"]
\ No newline at end of file
diff --git a/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml b/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml
index bed39b6bb..422c3af1b 100644
--- a/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml
+++ b/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml
@@ -1,22 +1,25 @@
-name = "Llama 3.1 Nemotron Ultra 253B v1"
-family = "llama"
-release_date = "2024-07-01"
-last_updated = "2025-10-04"
+name = "Llama-3.1-Nemotron-Ultra-253B-v1"
 attachment = false
-reasoning = true
-temperature = true
-knowledge = "2024-07"
+reasoning = false
 tool_call = true
-open_weights = false
+structured_output = true
+temperature = true
+knowledge = "2024-12"
+release_date = "2025-01-15"
+last_updated = "2026-02-04"
+open_weights = true
 
 [cost]
 input = 0.60
 output = 1.80
+cache_read = 0.06
+cache_write = 0.75
 
 [limit]
-context = 131072
-output = 8192
+context = 128_000
+input = 120_000
+output = 4_096
 
 [modalities]
 input = ["text"]
-output = ["text"]
+output = ["text"]
\ No newline at end of file