From 4c78ea9f36e4a41d795187d759dee2e117417b56 Mon Sep 17 00:00:00 2001 From: manascb1344 Date: Wed, 4 Feb 2026 20:16:01 +0530 Subject: [PATCH 1/6] feat(nebius): add new providers for Nebius Token Factory - Add MiniMaxAI provider with MiniMax-M2.1 model - Add PrimeIntellect provider with INTELLECT-3 model - Add black-forest-labs provider with FLUX.1-schnell and FLUX.1-dev - Add BAAI provider with bge-multilingual-gemma2 and BGE-ICL - Add intfloat provider with e5-mistral-7b-instruct - Add Google provider with Gemma-2-2b-it, Gemma-2-9b-it-fast, Gemma-3-27b-it, and Gemma-3-27b-it-fast --- providers/nebius/models/BAAI/bge-en-icl.toml | 24 +++++++++++++++ .../models/BAAI/bge-multilingual-gemma2.toml | 24 +++++++++++++++ .../nebius/models/MiniMaxAI/minimax-m2.1.toml | 29 +++++++++++++++++++ .../models/PrimeIntellect/intellect-3.toml | 25 ++++++++++++++++ .../models/black-forest-labs/flux-dev.toml | 23 +++++++++++++++ .../black-forest-labs/flux-schnell.toml | 23 +++++++++++++++ .../nebius/models/google/gemma-2-2b-it.toml | 25 ++++++++++++++++ .../models/google/gemma-2-9b-it-fast.toml | 25 ++++++++++++++++ .../models/google/gemma-3-27b-it-fast.toml | 25 ++++++++++++++++ .../nebius/models/google/gemma-3-27b-it.toml | 25 ++++++++++++++++ .../intfloat/e5-mistral-7b-instruct.toml | 24 +++++++++++++++ 11 files changed, 272 insertions(+) create mode 100644 providers/nebius/models/BAAI/bge-en-icl.toml create mode 100644 providers/nebius/models/BAAI/bge-multilingual-gemma2.toml create mode 100644 providers/nebius/models/MiniMaxAI/minimax-m2.1.toml create mode 100644 providers/nebius/models/PrimeIntellect/intellect-3.toml create mode 100644 providers/nebius/models/black-forest-labs/flux-dev.toml create mode 100644 providers/nebius/models/black-forest-labs/flux-schnell.toml create mode 100644 providers/nebius/models/google/gemma-2-2b-it.toml create mode 100644 providers/nebius/models/google/gemma-2-9b-it-fast.toml create mode 100644 providers/nebius/models/google/gemma-3-27b-it-fast.toml create mode 100644 providers/nebius/models/google/gemma-3-27b-it.toml create mode 100644 providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml diff --git a/providers/nebius/models/BAAI/bge-en-icl.toml b/providers/nebius/models/BAAI/bge-en-icl.toml new file mode 100644 index 000000000..d1fb98e82 --- /dev/null +++ b/providers/nebius/models/BAAI/bge-en-icl.toml @@ -0,0 +1,24 @@ +name = "BGE-ICL" +family = "text-embedding" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = false +knowledge = "2024-06" +release_date = "2024-07-30" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.01 +output = 0.00 + +[limit] +context = 32_768 +input = 32_768 +output = 0 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/BAAI/bge-multilingual-gemma2.toml b/providers/nebius/models/BAAI/bge-multilingual-gemma2.toml new file mode 100644 index 000000000..e238898eb --- /dev/null +++ b/providers/nebius/models/BAAI/bge-multilingual-gemma2.toml @@ -0,0 +1,24 @@ +name = "bge-multilingual-gemma2" +family = "text-embedding" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = false +knowledge = "2024-06" +release_date = "2024-07-30" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.01 +output = 0.00 + +[limit] +context = 8_192 +input = 8_192 +output = 0 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/MiniMaxAI/minimax-m2.1.toml b/providers/nebius/models/MiniMaxAI/minimax-m2.1.toml new file mode 100644 index 000000000..add20a0a4 --- /dev/null +++ b/providers/nebius/models/MiniMaxAI/minimax-m2.1.toml @@ -0,0 +1,29 @@ +name = "MiniMax-M2.1" +attachment = false +reasoning = true +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-10" +release_date = "2026-02-01" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.30 +output = 1.20 +reasoning = 1.20 +cache_read = 0.03 +cache_write = 0.375 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file diff --git a/providers/nebius/models/PrimeIntellect/intellect-3.toml b/providers/nebius/models/PrimeIntellect/intellect-3.toml new file mode 100644 index 000000000..743b1edb1 --- /dev/null +++ b/providers/nebius/models/PrimeIntellect/intellect-3.toml @@ -0,0 +1,25 @@ +name = "INTELLECT-3" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-10" +release_date = "2026-01-25" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.20 +output = 1.10 +cache_read = 0.02 +cache_write = 0.25 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/black-forest-labs/flux-dev.toml b/providers/nebius/models/black-forest-labs/flux-dev.toml new file mode 100644 index 000000000..edab8b75b --- /dev/null +++ b/providers/nebius/models/black-forest-labs/flux-dev.toml @@ -0,0 +1,23 @@ +name = "FLUX.1-dev" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = false +knowledge = "2024-07" +release_date = "2024-08-01" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.00 +output = 0.00 + +[limit] +context = 77 +input = 77 +output = 0 + +[modalities] +input = ["text"] +output = ["image"] \ No newline at end of file diff --git a/providers/nebius/models/black-forest-labs/flux-schnell.toml b/providers/nebius/models/black-forest-labs/flux-schnell.toml new file mode 100644 index 000000000..9b3f2a93f --- /dev/null +++ b/providers/nebius/models/black-forest-labs/flux-schnell.toml @@ -0,0 +1,23 @@ +name = "FLUX.1-schnell" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = false +knowledge = "2024-07" +release_date = "2024-08-01" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.00 +output = 0.00 + +[limit] +context = 77 +input = 77 +output = 0 + +[modalities] +input = ["text"] +output = ["image"] \ No newline at end of file diff --git a/providers/nebius/models/google/gemma-2-2b-it.toml b/providers/nebius/models/google/gemma-2-2b-it.toml new file mode 100644 index 000000000..f5e60c4ed --- /dev/null +++ b/providers/nebius/models/google/gemma-2-2b-it.toml @@ -0,0 +1,25 @@ +name = "Gemma-2-2b-it" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = true +knowledge = "2024-06" +release_date = "2024-07-31" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.02 +output = 0.06 +cache_read = 0.002 +cache_write = 0.025 + +[limit] +context = 8_192 +input = 8_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/google/gemma-2-9b-it-fast.toml b/providers/nebius/models/google/gemma-2-9b-it-fast.toml new file mode 100644 index 000000000..f7eab6454 --- /dev/null +++ b/providers/nebius/models/google/gemma-2-9b-it-fast.toml @@ -0,0 +1,25 @@ +name = "Gemma-2-9b-it (Fast)" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = true +knowledge = "2024-06" +release_date = "2024-06-27" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.03 +output = 0.09 +cache_read = 0.003 +cache_write = 0.0375 + +[limit] +context = 8_192 +input = 8_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/google/gemma-3-27b-it-fast.toml b/providers/nebius/models/google/gemma-3-27b-it-fast.toml new file mode 100644 index 000000000..ab80f0f5f --- /dev/null +++ b/providers/nebius/models/google/gemma-3-27b-it-fast.toml @@ -0,0 +1,25 @@ +name = "Gemma-3-27b-it (Fast)" +attachment = true +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-10" +release_date = "2026-01-20" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.20 +output = 0.60 +cache_read = 0.02 +cache_write = 0.25 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/google/gemma-3-27b-it.toml b/providers/nebius/models/google/gemma-3-27b-it.toml new file mode 100644 index 000000000..24e4a8fea --- /dev/null +++ b/providers/nebius/models/google/gemma-3-27b-it.toml @@ -0,0 +1,25 @@ +name = "Gemma-3-27b-it" +attachment = true +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-10" +release_date = "2026-01-20" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.10 +output = 0.30 +cache_read = 0.01 +cache_write = 0.125 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml b/providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml new file mode 100644 index 000000000..e19f6ddfc --- /dev/null +++ b/providers/nebius/models/intfloat/e5-mistral-7b-instruct.toml @@ -0,0 +1,24 @@ +name = "e5-mistral-7b-instruct" +family = "text-embedding" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = false +knowledge = "2023-12" +release_date = "2024-01-01" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.01 +output = 0.00 + +[limit] +context = 32_768 +input = 32_768 +output = 0 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file From 51fe42982fbc0eaf4dc038c26d2f3fa754b3b724 Mon Sep 17 00:00:00 2001 From: manascb1344 Date: Wed, 4 Feb 2026 20:16:21 +0530 Subject: [PATCH 2/6] feat(nebius): add DeepSeek models to Token Factory - Add DeepSeek-V3.2, DeepSeek-V3-0324 (Base & Fast), DeepSeek-R1-0528 (Base & Fast) - These are new models available on Nebius Token Factory --- .../deepseek-ai/deepseek-r1-0528-fast.toml | 22 ++++++++++++++ .../models/deepseek-ai/deepseek-r1-0528.toml | 29 +++++++++++++++++++ .../deepseek-ai/deepseek-v3-0324-fast.toml | 25 ++++++++++++++++ .../models/deepseek-ai/deepseek-v3-0324.toml | 25 ++++++++++++++++ .../models/deepseek-ai/deepseek-v3.2.toml | 29 +++++++++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml create mode 100644 providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml create mode 100644 providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml create mode 100644 providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml create mode 100644 providers/nebius/models/deepseek-ai/deepseek-v3.2.toml diff --git a/providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml b/providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml new file mode 100644 index 000000000..bf2c825ca --- /dev/null +++ b/providers/nebius/models/deepseek-ai/deepseek-r1-0528-fast.toml @@ -0,0 +1,22 @@ +name = "DeepSeek R1 0528 Fast" +family = "deepseek" +release_date = "2025-01-01" +last_updated = "2025-02-04" +attachment = false +reasoning = true +temperature = true +knowledge = "2025-01" +tool_call = true +open_weights = true + +[cost] +input = 2.00 +output = 6.00 + +[limit] +context = 131072 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml b/providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml new file mode 100644 index 000000000..c107fff95 --- /dev/null +++ b/providers/nebius/models/deepseek-ai/deepseek-r1-0528.toml @@ -0,0 +1,29 @@ +name = "DeepSeek-R1-0528" +attachment = false +reasoning = true +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-11" +release_date = "2026-01-15" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.80 +output = 2.40 +reasoning = 2.40 +cache_read = 0.08 +cache_write = 1.00 + +[limit] +context = 128_000 +input = 120_000 +output = 32_768 + +[modalities] +input = ["text"] +output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml b/providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml new file mode 100644 index 000000000..97974a613 --- /dev/null +++ b/providers/nebius/models/deepseek-ai/deepseek-v3-0324-fast.toml @@ -0,0 +1,25 @@ +name = "DeepSeek-V3-0324 (Fast)" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2024-12" +release_date = "2025-03-24" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.75 +output = 2.25 +cache_read = 0.075 +cache_write = 0.28125 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml b/providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml new file mode 100644 index 000000000..c86165e13 --- /dev/null +++ b/providers/nebius/models/deepseek-ai/deepseek-v3-0324.toml @@ -0,0 +1,25 @@ +name = "DeepSeek-V3-0324" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2024-12" +release_date = "2025-03-24" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.50 +output = 1.50 +cache_read = 0.05 +cache_write = 0.1875 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3.2.toml b/providers/nebius/models/deepseek-ai/deepseek-v3.2.toml new file mode 100644 index 000000000..330071236 --- /dev/null +++ b/providers/nebius/models/deepseek-ai/deepseek-v3.2.toml @@ -0,0 +1,29 @@ +name = "DeepSeek-V3.2" +attachment = false +reasoning = true +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-11" +release_date = "2026-01-20" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.30 +output = 0.45 +reasoning = 0.45 +cache_read = 0.03 +cache_write = 0.375 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file From ca6206b78e49cd8ec2a84c9bcf9721c8839afd42 Mon Sep 17 00:00:00 2001 From: manascb1344 Date: Wed, 4 Feb 2026 20:16:46 +0530 Subject: [PATCH 3/6] feat(nebius): add Qwen models to Token Factory - Add Qwen3-Next-80B-A3B-Thinking - Add Qwen3-30B-A3B-Thinking-2507 and Qwen3-30B-A3B-Instruct-2507 - Add Qwen3-Coder-30B-A3B-Instruct - Add Qwen3-32B (Base & Fast) - Add Qwen2.5-Coder-7B-fast - Add Qwen2.5-VL-72B-Instruct - Add Qwen3-Embedding-8B --- .../models/qwen/qwen2.5-coder-7b-fast.toml | 25 ++++++++++++++++ .../models/qwen/qwen2.5-vl-72b-instruct.toml | 25 ++++++++++++++++ .../qwen/qwen3-30b-a3b-instruct-2507.toml | 25 ++++++++++++++++ .../qwen/qwen3-30b-a3b-thinking-2507.toml | 29 +++++++++++++++++++ .../nebius/models/qwen/qwen3-32b-fast.toml | 25 ++++++++++++++++ providers/nebius/models/qwen/qwen3-32b.toml | 25 ++++++++++++++++ .../qwen/qwen3-coder-30b-a3b-instruct.toml | 25 ++++++++++++++++ .../models/qwen/qwen3-embedding-8b.toml | 24 +++++++++++++++ .../qwen/qwen3-next-80b-a3b-thinking.toml | 29 +++++++++++++++++++ 9 files changed, 232 insertions(+) create mode 100644 providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml create mode 100644 providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml create mode 100644 providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml create mode 100644 providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml create mode 100644 providers/nebius/models/qwen/qwen3-32b-fast.toml create mode 100644 providers/nebius/models/qwen/qwen3-32b.toml create mode 100644 providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml create mode 100644 providers/nebius/models/qwen/qwen3-embedding-8b.toml create mode 100644 providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml diff --git a/providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml b/providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml new file mode 100644 index 000000000..34dc142e7 --- /dev/null +++ b/providers/nebius/models/qwen/qwen2.5-coder-7b-fast.toml @@ -0,0 +1,25 @@ +name = "Qwen2.5-Coder-7B (Fast)" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2024-09" +release_date = "2024-09-19" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.03 +output = 0.09 +cache_read = 0.003 +cache_write = 0.03 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml b/providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml new file mode 100644 index 000000000..b21f2c926 --- /dev/null +++ b/providers/nebius/models/qwen/qwen2.5-vl-72b-instruct.toml @@ -0,0 +1,25 @@ +name = "Qwen2.5-VL-72B-Instruct" +attachment = true +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2024-12" +release_date = "2025-01-20" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.25 +output = 0.75 +cache_read = 0.025 +cache_write = 0.31 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml b/providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml new file mode 100644 index 000000000..96c2d3f36 --- /dev/null +++ b/providers/nebius/models/qwen/qwen3-30b-a3b-instruct-2507.toml @@ -0,0 +1,25 @@ +name = "Qwen3-30B-A3B-Instruct-2507" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-12" +release_date = "2026-01-28" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.10 +output = 0.30 +cache_read = 0.01 +cache_write = 0.125 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml b/providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml new file mode 100644 index 000000000..375b81e4e --- /dev/null +++ b/providers/nebius/models/qwen/qwen3-30b-a3b-thinking-2507.toml @@ -0,0 +1,29 @@ +name = "Qwen3-30B-A3B-Thinking-2507" +attachment = false +reasoning = true +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-12" +release_date = "2026-01-28" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.10 +output = 0.30 +reasoning = 0.30 +cache_read = 0.01 +cache_write = 0.125 + +[limit] +context = 128_000 +input = 120_000 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen3-32b-fast.toml b/providers/nebius/models/qwen/qwen3-32b-fast.toml new file mode 100644 index 000000000..b14e5b8f8 --- /dev/null +++ b/providers/nebius/models/qwen/qwen3-32b-fast.toml @@ -0,0 +1,25 @@ +name = "Qwen3-32B (Fast)" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-12" +release_date = "2026-01-28" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.20 +output = 0.60 +cache_read = 0.02 +cache_write = 0.25 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen3-32b.toml b/providers/nebius/models/qwen/qwen3-32b.toml new file mode 100644 index 000000000..6c619f6e5 --- /dev/null +++ b/providers/nebius/models/qwen/qwen3-32b.toml @@ -0,0 +1,25 @@ +name = "Qwen3-32B" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-12" +release_date = "2026-01-28" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.10 +output = 0.30 +cache_read = 0.01 +cache_write = 0.125 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml b/providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml new file mode 100644 index 000000000..5d3d1849e --- /dev/null +++ b/providers/nebius/models/qwen/qwen3-coder-30b-a3b-instruct.toml @@ -0,0 +1,25 @@ +name = "Qwen3-Coder-30B-A3B-Instruct" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-12" +release_date = "2026-01-28" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.10 +output = 0.30 +cache_read = 0.01 +cache_write = 0.125 + +[limit] +context = 128_000 +input = 120_000 +output = 8_192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen3-embedding-8b.toml b/providers/nebius/models/qwen/qwen3-embedding-8b.toml new file mode 100644 index 000000000..205cee1bc --- /dev/null +++ b/providers/nebius/models/qwen/qwen3-embedding-8b.toml @@ -0,0 +1,24 @@ +name = "Qwen3-Embedding-8B" +family = "text-embedding" +attachment = false +reasoning = false +tool_call = false +structured_output = false +temperature = false +knowledge = "2025-10" +release_date = "2026-01-10" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.01 +output = 0.00 + +[limit] +context = 32_768 +input = 32_768 +output = 0 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml b/providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml new file mode 100644 index 000000000..ca0b79132 --- /dev/null +++ b/providers/nebius/models/qwen/qwen3-next-80b-a3b-thinking.toml @@ -0,0 +1,29 @@ +name = "Qwen3-Next-80B-A3B-Thinking" +attachment = false +reasoning = true +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-12" +release_date = "2026-01-28" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.15 +output = 1.20 +reasoning = 1.20 +cache_read = 0.015 +cache_write = 0.18 + +[limit] +context = 128_000 +input = 120_000 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file From 2639e20a97118e4ebc438fb8e1111be1c878544f Mon Sep 17 00:00:00 2001 From: manascb1344 Date: Wed, 4 Feb 2026 20:17:14 +0530 Subject: [PATCH 4/6] feat(nebius): add new models from Z.ai, Moonshot AI, Meta, and NVIDIA - Add GLM-4.7 and GLM-4.7-FP8 (Z.ai) - Add Kimi-K2-Thinking (Moonshot AI) - Add Llama-Guard-3-8B, Meta-Llama-3.1-8B-Instruct (Base & Fast) (Meta) - Add Nemotron-Nano-V2-12b and NVIDIA-Nemotron-3-Nano-30B-A3B (NVIDIA) --- .../models/meta-llama/llama-guard-3-8b.toml | 25 ++++++++++++++++ .../meta-llama-3.1-8b-instruct-fast.toml | 25 ++++++++++++++++ .../meta-llama-3.1-8b-instruct.toml | 25 ++++++++++++++++ .../models/moonshotai/kimi-k2-thinking.toml | 29 +++++++++++++++++++ .../models/nvidia/nemotron-nano-v2-12b.toml | 25 ++++++++++++++++ .../nvidia-nemotron-3-nano-30b-a3b.toml | 25 ++++++++++++++++ .../nebius/models/zai-org/glm-4.7-fp8.toml | 25 ++++++++++++++++ providers/nebius/models/zai-org/glm-4.7.toml | 22 ++++++++++++++ 8 files changed, 201 insertions(+) create mode 100644 providers/nebius/models/meta-llama/llama-guard-3-8b.toml create mode 100644 providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml create mode 100644 providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml create mode 100644 providers/nebius/models/moonshotai/kimi-k2-thinking.toml create mode 100644 providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml create mode 100644 providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml create mode 100644 providers/nebius/models/zai-org/glm-4.7-fp8.toml create mode 100644 providers/nebius/models/zai-org/glm-4.7.toml diff --git a/providers/nebius/models/meta-llama/llama-guard-3-8b.toml b/providers/nebius/models/meta-llama/llama-guard-3-8b.toml new file mode 100644 index 000000000..c072b096f --- /dev/null +++ b/providers/nebius/models/meta-llama/llama-guard-3-8b.toml @@ -0,0 +1,25 @@ +name = "Llama-Guard-3-8B" +attachment = false +reasoning = false +tool_call = false +structured_output = true +temperature = false +knowledge = "2024-04" +release_date = "2024-04-18" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.02 +output = 0.06 +cache_read = 0.002 +cache_write = 0.025 + +[limit] +context = 8_192 +input = 8_000 +output = 1_024 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml new file mode 100644 index 000000000..ac2c13268 --- /dev/null +++ b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct-fast.toml @@ -0,0 +1,25 @@ +name = "Meta-Llama-3.1-8B-Instruct (Fast)" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2024-12" +release_date = "2024-07-23" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.03 +output = 0.09 +cache_read = 0.003 +cache_write = 0.03 + +[limit] +context = 128_000 +input = 120_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml new file mode 100644 index 000000000..ddb722a68 --- /dev/null +++ b/providers/nebius/models/meta-llama/meta-llama-3.1-8b-instruct.toml @@ -0,0 +1,25 @@ +name = "Meta-Llama-3.1-8B-Instruct" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2024-12" +release_date = "2024-07-23" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.02 +output = 0.06 +cache_read = 0.002 +cache_write = 0.025 + +[limit] +context = 128_000 +input = 120_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/moonshotai/kimi-k2-thinking.toml b/providers/nebius/models/moonshotai/kimi-k2-thinking.toml new file mode 100644 index 000000000..a4af04c79 --- /dev/null +++ b/providers/nebius/models/moonshotai/kimi-k2-thinking.toml @@ -0,0 +1,29 @@ +name = "Kimi-K2-Thinking" +attachment = true +reasoning = true +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-10" +release_date = "2026-01-05" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.60 +output = 2.50 +reasoning = 2.50 +cache_read = 0.06 +cache_write = 0.75 + +[limit] +context = 128_000 +input = 120_000 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file diff --git a/providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml b/providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml new file mode 100644 index 000000000..c11729d6b --- /dev/null +++ b/providers/nebius/models/nvidia/nemotron-nano-v2-12b.toml @@ -0,0 +1,25 @@ +name = "Nemotron-Nano-V2-12b" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-01" +release_date = "2025-03-15" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.07 +output = 0.20 +cache_read = 0.007 +cache_write = 0.08 + +[limit] +context = 32_000 +input = 30_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml b/providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml new file mode 100644 index 000000000..c29a4b957 --- /dev/null +++ b/providers/nebius/models/nvidia/nvidia-nemotron-3-nano-30b-a3b.toml @@ -0,0 +1,25 @@ +name = "Nemotron-3-Nano-30B-A3B" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-05" +release_date = "2025-08-10" +last_updated = "2026-02-04" +open_weights = true + +[cost] +input = 0.06 +output = 0.24 +cache_read = 0.006 +cache_write = 0.075 + +[limit] +context = 32_000 +input = 30_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/zai-org/glm-4.7-fp8.toml b/providers/nebius/models/zai-org/glm-4.7-fp8.toml new file mode 100644 index 000000000..856616689 --- /dev/null +++ b/providers/nebius/models/zai-org/glm-4.7-fp8.toml @@ -0,0 +1,25 @@ +name = "GLM-4.7 (FP8)" +attachment = false +reasoning = false +tool_call = true +structured_output = true +temperature = true +knowledge = "2025-12" +release_date = "2026-01-15" +last_updated = "2026-02-04" +open_weights = false + +[cost] +input = 0.40 +output = 2.00 +cache_read = 0.04 +cache_write = 0.50 + +[limit] +context = 128_000 +input = 124_000 +output = 4_096 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/zai-org/glm-4.7.toml b/providers/nebius/models/zai-org/glm-4.7.toml new file mode 100644 index 000000000..115d44535 --- /dev/null +++ b/providers/nebius/models/zai-org/glm-4.7.toml @@ -0,0 +1,22 @@ +name = "GLM 4.7" +family = "glm" +release_date = "2025-01-01" +last_updated = "2025-02-04" +attachment = false +reasoning = true +temperature = true +knowledge = "2025-01" +tool_call = true +open_weights = false + +[cost] +input = 0.40 +output = 2.00 + +[limit] +context = 131072 +output = 8192 + +[modalities] +input = ["text"] +output = ["text"] \ No newline at end of file From 578df73ffbfc93d8d7634d6d6c61d442d29c8b7d Mon Sep 17 00:00:00 2001 From: manascb1344 Date: Wed, 4 Feb 2026 20:17:51 +0530 Subject: [PATCH 5/6] feat(nebius): update Z.ai, OpenAI, Moonshot AI, and NousResearch models - Update GLM-4.5 and GLM-4.5-Air with new pricing - Update gpt-oss-120b and gpt-oss-20b with new pricing and features - Update Kimi-K2-Instruct with new pricing and multimodal support - Update Hermes-4-405B and Hermes-4-70B with new pricing --- .../models/NousResearch/hermes-4-405b.toml | 25 ++++++++++------- .../models/NousResearch/hermes-4-70b.toml | 25 ++++++++++------- .../models/moonshotai/kimi-k2-instruct.toml | 27 ++++++++++--------- .../nebius/models/openai/gpt-oss-120b.toml | 27 ++++++++++++------- .../nebius/models/openai/gpt-oss-20b.toml | 27 ++++++++++--------- .../nebius/models/zai-org/glm-4.5-air.toml | 23 +++++++++------- providers/nebius/models/zai-org/glm-4.5.toml | 23 +++++++++------- 7 files changed, 105 insertions(+), 72 deletions(-) diff --git a/providers/nebius/models/NousResearch/hermes-4-405b.toml b/providers/nebius/models/NousResearch/hermes-4-405b.toml index aabe24096..728ad3080 100644 --- a/providers/nebius/models/NousResearch/hermes-4-405b.toml +++ b/providers/nebius/models/NousResearch/hermes-4-405b.toml @@ -1,22 +1,29 @@ -name = "Hermes-4 405B" -family = "hermes" -release_date = "2024-08-01" -last_updated = "2025-10-04" +name = "Hermes-4-405B" attachment = false reasoning = true -temperature = true -knowledge = "2024-07" tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2025-11" +release_date = "2026-01-30" +last_updated = "2026-02-04" +open_weights = true [cost] input = 1.00 output = 3.00 +reasoning = 3.00 +cache_read = 0.10 +cache_write = 1.25 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 120_000 +output = 8_192 [modalities] input = ["text"] output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file diff --git a/providers/nebius/models/NousResearch/hermes-4-70b.toml b/providers/nebius/models/NousResearch/hermes-4-70b.toml index e242ac6c2..7080f0827 100644 --- a/providers/nebius/models/NousResearch/hermes-4-70b.toml +++ b/providers/nebius/models/NousResearch/hermes-4-70b.toml @@ -1,22 +1,29 @@ -name = "Hermes 4 70B" -family = "hermes" -release_date = "2024-08-01" -last_updated = "2025-10-04" +name = "Hermes-4-70B" attachment = false reasoning = true -temperature = true -knowledge = "2024-07" tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2025-11" +release_date = "2026-01-30" +last_updated = "2026-02-04" +open_weights = true [cost] input = 0.13 output = 0.40 +reasoning = 0.40 +cache_read = 0.013 +cache_write = 0.16 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 120_000 +output = 8_192 [modalities] input = ["text"] output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file diff --git a/providers/nebius/models/moonshotai/kimi-k2-instruct.toml b/providers/nebius/models/moonshotai/kimi-k2-instruct.toml index 84b8a7da5..d4c144c9a 100644 --- a/providers/nebius/models/moonshotai/kimi-k2-instruct.toml +++ b/providers/nebius/models/moonshotai/kimi-k2-instruct.toml @@ -1,22 +1,25 @@ -name = "Kimi K2 Instruct" -family = "kimi" -release_date = "2025-01-01" -last_updated = "2025-10-04" -attachment = false -reasoning = true -temperature = true -knowledge = "2024-01" +name = "Kimi-K2-Instruct" +attachment = true +reasoning = false tool_call = true +structured_output = true +temperature = true +knowledge = "2025-10" +release_date = "2026-01-05" +last_updated = "2026-02-04" open_weights = false [cost] input = 0.50 output = 2.40 +cache_read = 0.05 +cache_write = 0.625 [limit] -context = 131072 -output = 8192 +context = 200_000 +input = 190_000 +output = 8_192 [modalities] -input = ["text"] -output = ["text"] +input = ["text", "image"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/openai/gpt-oss-120b.toml b/providers/nebius/models/openai/gpt-oss-120b.toml index 0b28d7a9a..2e61d87f2 100644 --- a/providers/nebius/models/openai/gpt-oss-120b.toml +++ b/providers/nebius/models/openai/gpt-oss-120b.toml @@ -1,22 +1,29 @@ -name = "GPT OSS 120B" -family = "gpt-oss" -release_date = "2024-01-01" -last_updated = "2025-10-04" -attachment = true +name = "gpt-oss-120b" +attachment = false reasoning = true -temperature = true -knowledge = "2024-01" tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2025-09" +release_date = "2026-01-10" +last_updated = "2026-02-04" +open_weights = true [cost] input = 0.15 output = 0.60 +reasoning = 0.60 +cache_read = 0.015 +cache_write = 0.18 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 124_000 +output = 8_192 [modalities] input = ["text"] output = ["text"] + +[interleaved] +field = "reasoning_content" \ No newline at end of file diff --git a/providers/nebius/models/openai/gpt-oss-20b.toml b/providers/nebius/models/openai/gpt-oss-20b.toml index 49dd525d1..d881b3da6 100644 --- a/providers/nebius/models/openai/gpt-oss-20b.toml +++ b/providers/nebius/models/openai/gpt-oss-20b.toml @@ -1,22 +1,25 @@ -name = "GPT OSS 20B" -family = "gpt-oss" -release_date = "2024-01-01" -last_updated = "2025-10-04" -attachment = true -reasoning = true -temperature = true -knowledge = "2024-01" +name = "gpt-oss-20b" +attachment = false +reasoning = false tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2025-09" +release_date = "2026-01-10" +last_updated = "2026-02-04" +open_weights = true [cost] input = 0.05 output = 0.20 +cache_read = 0.005 +cache_write = 0.06 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 124_000 +output = 4_096 [modalities] input = ["text"] -output = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/zai-org/glm-4.5-air.toml b/providers/nebius/models/zai-org/glm-4.5-air.toml index 4cafdff07..f0eb1e418 100644 --- a/providers/nebius/models/zai-org/glm-4.5-air.toml +++ b/providers/nebius/models/zai-org/glm-4.5-air.toml @@ -1,22 +1,25 @@ -name = "GLM 4.5 Air" -family = "glm-air" -release_date = "2024-06-01" -last_updated = "2025-10-04" +name = "GLM-4.5-Air" attachment = false -reasoning = true -temperature = true -knowledge = "2024-05" +reasoning = false tool_call = true +structured_output = true +temperature = true +knowledge = "2025-06" +release_date = "2025-11-15" +last_updated = "2026-02-04" open_weights = false [cost] input = 0.20 output = 1.20 +cache_read = 0.02 +cache_write = 0.25 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 124_000 +output = 4_096 [modalities] input = ["text"] -output = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/zai-org/glm-4.5.toml b/providers/nebius/models/zai-org/glm-4.5.toml index cd27b825e..fbae120ad 100644 --- a/providers/nebius/models/zai-org/glm-4.5.toml +++ b/providers/nebius/models/zai-org/glm-4.5.toml @@ -1,22 +1,25 @@ -name = "GLM 4.5" -family = "glm" -release_date = "2024-06-01" -last_updated = "2025-10-04" +name = "GLM-4.5" attachment = false -reasoning = true -temperature = true -knowledge = "2024-05" +reasoning = false tool_call = true +structured_output = true +temperature = true +knowledge = "2025-06" +release_date = "2025-11-15" +last_updated = "2026-02-04" open_weights = false [cost] input = 0.60 output = 2.20 +cache_read = 0.06 +cache_write = 0.75 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 124_000 +output = 4_096 [modalities] input = ["text"] -output = ["text"] +output = ["text"] \ No newline at end of file From 42a79c717d2b9a5834118243ca9b38429c977033 Mon Sep 17 00:00:00 2001 From: manascb1344 Date: Wed, 4 Feb 2026 20:18:37 +0530 Subject: [PATCH 6/6] feat(nebius): update Meta-Llama, NVIDIA models and mark deprecated - Update Llama-3.3-70B-Instruct (Base & Fast) with new pricing - Mark Llama-3.1-405B-Instruct as deprecated (no longer available) - Update Llama-3.1-Nemotron-Ultra-253B-v1 with new pricing - Mark DeepSeek-V3 as deprecated (replaced by V3.2 and V3-0324) --- .../models/deepseek-ai/deepseek-v3.toml | 3 ++- .../llama-3.3-70b-instruct-base.toml | 25 ++++++++++-------- .../llama-3.3-70b-instruct-fast.toml | 23 +++++++++------- .../meta-llama/llama-3_1-405b-instruct.toml | 26 +++++++++++-------- .../llama-3_1-nemotron-ultra-253b-v1.toml | 25 ++++++++++-------- 5 files changed, 58 insertions(+), 44 deletions(-) diff --git a/providers/nebius/models/deepseek-ai/deepseek-v3.toml b/providers/nebius/models/deepseek-ai/deepseek-v3.toml index 4a638b4b6..802211e73 100644 --- a/providers/nebius/models/deepseek-ai/deepseek-v3.toml +++ b/providers/nebius/models/deepseek-ai/deepseek-v3.toml @@ -1,13 +1,14 @@ name = "DeepSeek V3" family = "deepseek" release_date = "2024-05-07" -last_updated = "2025-10-04" +last_updated = "2026-02-04" attachment = false reasoning = true temperature = true knowledge = "2024-04" tool_call = true open_weights = false +status = "deprecated" [cost] input = 0.50 diff --git a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml index ea561f5f3..4644871f3 100644 --- a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml +++ b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-base.toml @@ -1,22 +1,25 @@ -name = "Llama-3.3-70B-Instruct (Base)" -family = "llama" -release_date = "2024-08-22" -last_updated = "2025-10-04" +name = "Llama-3.3-70B-Instruct" attachment = false -reasoning = true -temperature = true -knowledge = "2024-08" +reasoning = false tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2025-08" +release_date = "2025-12-05" +last_updated = "2026-02-04" +open_weights = true [cost] input = 0.13 output = 0.40 +cache_read = 0.013 +cache_write = 0.16 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 120_000 +output = 8_192 [modalities] input = ["text"] -output = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml index 5aabbdbad..baa5ca319 100644 --- a/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml +++ b/providers/nebius/models/meta-llama/llama-3.3-70b-instruct-fast.toml @@ -1,22 +1,25 @@ name = "Llama-3.3-70B-Instruct (Fast)" -family = "llama" -release_date = "2024-08-22" -last_updated = "2025-10-04" attachment = false -reasoning = true -temperature = true -knowledge = "2024-08" +reasoning = false tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2025-08" +release_date = "2025-12-05" +last_updated = "2026-02-04" +open_weights = true [cost] input = 0.25 output = 0.75 +cache_read = 0.025 +cache_write = 0.31 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 120_000 +output = 8_192 [modalities] input = ["text"] -output = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml b/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml index a334266e6..9309ace60 100644 --- a/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml +++ b/providers/nebius/models/meta-llama/llama-3_1-405b-instruct.toml @@ -1,22 +1,26 @@ -name = "Llama 3.1 405B Instruct" -family = "llama" -release_date = "2024-07-23" -last_updated = "2025-10-04" +name = "Llama-3.1-405B-Instruct" attachment = false -reasoning = true -temperature = true -knowledge = "2024-03" +reasoning = false tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2024-12" +release_date = "2025-01-20" +last_updated = "2026-02-04" +open_weights = true +status = "deprecated" [cost] input = 1.00 output = 3.00 +cache_read = 0.10 +cache_write = 1.25 [limit] -context = 131072 -output = 8192 +context = 131_072 +input = 124_000 +output = 8_192 [modalities] input = ["text"] -output = ["text"] +output = ["text"] \ No newline at end of file diff --git a/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml b/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml index bed39b6bb..422c3af1b 100644 --- a/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml +++ b/providers/nebius/models/nvidia/llama-3_1-nemotron-ultra-253b-v1.toml @@ -1,22 +1,25 @@ -name = "Llama 3.1 Nemotron Ultra 253B v1" -family = "llama" -release_date = "2024-07-01" -last_updated = "2025-10-04" +name = "Llama-3.1-Nemotron-Ultra-253B-v1" attachment = false -reasoning = true -temperature = true -knowledge = "2024-07" +reasoning = false tool_call = true -open_weights = false +structured_output = true +temperature = true +knowledge = "2024-12" +release_date = "2025-01-15" +last_updated = "2026-02-04" +open_weights = true [cost] input = 0.60 output = 1.80 +cache_read = 0.06 +cache_write = 0.75 [limit] -context = 131072 -output = 8192 +context = 128_000 +input = 120_000 +output = 4_096 [modalities] input = ["text"] -output = ["text"] +output = ["text"] \ No newline at end of file