diff --git a/agent-schema.json b/agent-schema.json index b665443d2..a1ce21233 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -182,7 +182,7 @@ "properties": { "provider": { "type": "string", - "description": "The underlying provider type. Defaults to \"openai\" when not set. Supported values: openai, anthropic, google, amazon-bedrock, dmr, and any built-in alias (requesty, openrouter, azure, xai, ollama, mistral, baseten, etc.).", + "description": "The underlying provider type. Defaults to \"openai\" when not set. Supported values: openai, anthropic, google, amazon-bedrock, dmr, and any built-in alias (requesty, openrouter, azure, xai, ollama, mistral, baseten, ovhcloud, etc.).", "examples": [ "openai", "anthropic", diff --git a/docs/_data/nav.yml b/docs/_data/nav.yml index 4ff3c653a..9187b9fde 100644 --- a/docs/_data/nav.yml +++ b/docs/_data/nav.yml @@ -147,6 +147,8 @@ url: /providers/nebius/ - title: Baseten url: /providers/baseten/ + - title: OVHcloud + url: /providers/ovhcloud/ - title: MiniMax url: /providers/minimax/ - title: OpenRouter diff --git a/docs/concepts/models/index.md b/docs/concepts/models/index.md index afb2bfde5..3e8e7a824 100644 --- a/docs/concepts/models/index.md +++ b/docs/concepts/models/index.md @@ -82,6 +82,7 @@ for details. | Nebius | `nebius` | Open-source and specialised models | `NEBIUS_API_KEY` | | MiniMax | `minimax` | MiniMax models | `MINIMAX_API_KEY` | | Baseten | `baseten` | DeepSeek, Kimi, GLM, Llama models | `BASETEN_API_KEY` | +| OVHcloud | `ovhcloud` | Qwen, Llama, Mistral, DeepSeek (EU-hosted) | `OVH_AI_ENDPOINTS_ACCESS_TOKEN` | | Requesty | `requesty` | Multi-provider gateway | `REQUESTY_API_KEY` | | OpenRouter | `openrouter` | Multi-provider gateway | `OPENROUTER_API_KEY` | | Azure OpenAI | `azure` | gpt-4o, gpt-5 on Azure | `AZURE_API_KEY` + `base_url` | diff --git a/docs/configuration/models/index.md b/docs/configuration/models/index.md index 2339fc1db..bf5839c8b 100644 --- a/docs/configuration/models/index.md +++ b/docs/configuration/models/index.md @@ -17,7 +17,7 @@ models: first_available: [list] # Optional: candidate model refs, tried in order by available credentials. # Mutually exclusive with other model settings. provider: string # Required unless using first_available. One of: openai, anthropic, google, amazon-bedrock, - # dmr, mistral, xai, nebius, minimax, baseten, requesty, openrouter, + # dmr, mistral, xai, nebius, minimax, baseten, ovhcloud, requesty, openrouter, # azure, ollama, github-copilot, or a named provider defined # under the top-level `providers:` section. model: string # Required: model identifier @@ -48,7 +48,7 @@ models: | Property | Type | Required | Description | | --------------------- | ---------- | -------- | ------------------------------------------------------------------------------------- | | `first_available` | array | ✗ | Candidate model references tried in order; selects the first whose credentials are configured. Mutually exclusive with other model settings. | -| `provider` | string | ✓/✗ | Required for regular model definitions; omitted for `first_available` selectors. Provider: `openai`, `anthropic`, `google`, `amazon-bedrock`, `dmr`, `mistral`, `xai`, `nebius`, `minimax`, `baseten`, `requesty`, `openrouter`, `azure`, `ollama`, `github-copilot`, or any [named provider]({{ '/providers/custom/' | relative_url }}). | +| `provider` | string | ✓/✗ | Required for regular model definitions; omitted for `first_available` selectors. Provider: `openai`, `anthropic`, `google`, `amazon-bedrock`, `dmr`, `mistral`, `xai`, `nebius`, `minimax`, `baseten`, `ovhcloud`, `requesty`, `openrouter`, `azure`, `ollama`, `github-copilot`, or any [named provider]({{ '/providers/custom/' | relative_url }}). | | `model` | string | ✓/✗ | Required for regular model definitions; omitted for `first_available` selectors. Model name (e.g., `gpt-4o`, `claude-sonnet-4-5`, `gemini-3.5-flash`) | | `temperature` | float | ✗ | Sampling randomness. Range is provider-dependent — typically `0.0–2.0` (Anthropic caps at `1.0`). `0.0` is deterministic. | | `max_tokens` | int | ✗ | Maximum response length in tokens | @@ -404,7 +404,7 @@ See the [Anthropic provider page]({{ '/providers/anthropic/#thinking-display' | ## Custom HTTP Headers For OpenAI-compatible providers (`openai`, `github-copilot`, `mistral`, `xai`, -`nebius`, `minimax`, `baseten`, `requesty`, `openrouter`, `ollama`, and any custom provider using the OpenAI API), +`nebius`, `minimax`, `baseten`, `ovhcloud`, `requesty`, `openrouter`, `ollama`, and any custom provider using the OpenAI API), `provider_opts.http_headers` adds arbitrary HTTP headers to every outgoing request: diff --git a/docs/providers/overview/index.md b/docs/providers/overview/index.md index 057f27374..088a60dce 100644 --- a/docs/providers/overview/index.md +++ b/docs/providers/overview/index.md @@ -66,6 +66,7 @@ docker-agent also includes built-in aliases for these providers: | Nebius | `nebius` | `NEBIUS_API_KEY` | | MiniMax | `minimax` | `MINIMAX_API_KEY` | | Baseten | `baseten` | `BASETEN_API_KEY` | +| OVHcloud | `ovhcloud` | `OVH_AI_ENDPOINTS_ACCESS_TOKEN` | | Requesty | `requesty` | `REQUESTY_API_KEY` | | OpenRouter | `openrouter` | `OPENROUTER_API_KEY` | | Azure OpenAI | `azure` | `AZURE_API_KEY` + `base_url` | diff --git a/docs/providers/ovhcloud/index.md b/docs/providers/ovhcloud/index.md new file mode 100644 index 000000000..7ff5b6292 --- /dev/null +++ b/docs/providers/ovhcloud/index.md @@ -0,0 +1,108 @@ +--- +title: "OVHcloud" +description: "Use OVHcloud AI Endpoints models with docker-agent." +permalink: /providers/ovhcloud/ +--- + +# OVHcloud + +_Use OVHcloud AI Endpoints models with docker-agent._ + +## Overview + +[OVHcloud AI Endpoints](https://endpoints.ai.cloud.ovh.net/) serves open-weight +models through an OpenAI-compatible API, hosted in the EU. docker-agent includes +built-in support for OVHcloud as an alias provider. + +## Setup + +1. Create an access token from the + [OVHcloud AI Endpoints portal](https://endpoints.ai.cloud.ovh.net/). +2. Set the environment variable: + + ```bash + export OVH_AI_ENDPOINTS_ACCESS_TOKEN=your-access-token + ``` + +## Usage + +### Inline Syntax + +```yaml +agents: + root: + model: ovhcloud/Qwen3.5-397B-A17B + description: Assistant using OVHcloud + instruction: You are a helpful assistant. +``` + +### Named Model + +```yaml +models: + ovhcloud_model: + provider: ovhcloud + model: Qwen3.5-397B-A17B + temperature: 0.7 + max_tokens: 8192 + +agents: + root: + model: ovhcloud_model + description: Assistant using OVHcloud + instruction: You are a helpful assistant. +``` + +## Available Models + +OVHcloud hosts a rotating catalogue of open-weight models. Check the +[AI Endpoints catalogue](https://endpoints.ai.cloud.ovh.net/) for current model +IDs, context limits, and free-tier availability. + +| Model | Description | +| --- | --- | +| `Qwen3.5-397B-A17B` | Large Qwen3.5 MoE — strong general, coding, and reasoning | +| `Qwen3-32B` | Mid-size Qwen3 — fast, tool-calling, reasoning | +| `Qwen3.6-27B` | Compact Qwen3.6 — fast and efficient | +| `Qwen3.5-9B` | Small Qwen3.5 — lightweight, free-tier friendly | +| `Qwen3-Coder-30B-A3B-Instruct` | Qwen3 Coder MoE — optimised for code generation | +| `Meta-Llama-3_3-70B-Instruct` | Llama 3.3 70B — reliable general-purpose chat | +| `Mistral-Small-3.2-24B-Instruct-2506` | Compact, fast, tool-calling | + +> Model IDs are case-sensitive and must be passed exactly as the catalogue lists +> them. + +## How It Works + +OVHcloud is implemented as a built-in alias in docker-agent: + +- **API Type:** OpenAI-compatible (`openai_chatcompletions`) +- **Base URL:** `https://oai.endpoints.kepler.ai.cloud.ovh.net/v1` +- **Token Variable:** `OVH_AI_ENDPOINTS_ACCESS_TOKEN` + +docker-agent automatically coalesces consecutive system messages into one for +OVHcloud, because some OVHcloud models return an empty stream when a request +carries more than one system message. + +## Free tier + +OVHcloud offers rate-limited free access to several models. Under heavy +rate-limiting the endpoint may return an empty response; docker-agent surfaces +this as a warning rather than failing. For sustained use, an access token with a +paid plan avoids the free-tier request-rate cap. + +## Example: Code Assistant + +```yaml +agents: + coder: + model: ovhcloud/Qwen3.5-397B-A17B + description: Code assistant using Qwen3.5 + instruction: | + You are an expert programmer. + Write clean, well-documented code and follow language best practices. + toolsets: + - type: filesystem + - type: shell + - type: think +``` diff --git a/examples/README.md b/examples/README.md index d4cc66fd0..d067f5da1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -194,6 +194,7 @@ remote MCP endpoints. | [`model_env_substitution.yaml`](model_env_substitution.yaml) | `${env.VAR}` substitution in a model's `model` / `base_url`. | | [`nebius.yaml`](nebius.yaml) | Nebius cloud provider. | | [`baseten.yaml`](baseten.yaml) | Baseten cloud provider. | +| [`ovhcloud.yaml`](ovhcloud.yaml) | OVHcloud AI Endpoints provider. | | [`grok.yaml`](grok.yaml) | xAI Grok model. | | [`github-copilot.yaml`](github-copilot.yaml) | GitHub Copilot models via OAuth device-flow. | | [`fallback_models.yaml`](fallback_models.yaml) | Automatic fallback to a secondary model when the primary fails. | diff --git a/examples/ovhcloud.yaml b/examples/ovhcloud.yaml new file mode 100644 index 000000000..ec010fe65 --- /dev/null +++ b/examples/ovhcloud.yaml @@ -0,0 +1,17 @@ +# yaml-language-server: $schema=../agent-schema.json + +models: + ovhcloud_model: + provider: ovhcloud + model: Qwen3.5-397B-A17B + +agents: + root: + model: ovhcloud_model + description: Assistant using OVHcloud AI Endpoints + instruction: | + You are a helpful assistant. + toolsets: + - type: filesystem + - type: shell + - type: think diff --git a/pkg/config/auto.go b/pkg/config/auto.go index fe40fd125..55f308bd8 100644 --- a/pkg/config/auto.go +++ b/pkg/config/auto.go @@ -45,6 +45,7 @@ var cloudProviders = []providerConfig{ {"mistral", []string{"MISTRAL_API_KEY"}, "MISTRAL_API_KEY"}, {"openrouter", []string{"OPENROUTER_API_KEY"}, "OPENROUTER_API_KEY"}, {"baseten", []string{"BASETEN_API_KEY"}, "BASETEN_API_KEY"}, + {"ovhcloud", []string{"OVH_AI_ENDPOINTS_ACCESS_TOKEN"}, "OVH_AI_ENDPOINTS_ACCESS_TOKEN"}, {"amazon-bedrock", []string{ "AWS_BEARER_TOKEN_BEDROCK", "AWS_ACCESS_KEY_ID", @@ -109,6 +110,7 @@ var DefaultModels = map[string]string{ "mistral": "mistral-small-latest", "openrouter": "meta-llama/llama-3.3-70b-instruct", "baseten": "deepseek-ai/DeepSeek-V3.1", + "ovhcloud": "Qwen3.5-397B-A17B", "amazon-bedrock": "global.anthropic.claude-sonnet-4-5-20250929-v1:0", "opencode-go": "deepseek-v4-flash", "opencode-zen": "deepseek-v4-flash-free", diff --git a/pkg/config/auto_test.go b/pkg/config/auto_test.go index 8cc34c207..2fc1b39fb 100644 --- a/pkg/config/auto_test.go +++ b/pkg/config/auto_test.go @@ -62,6 +62,13 @@ func TestAvailableProviders_NoGateway(t *testing.T) { }, expectedProvider: "baseten", }, + { + name: "ovhcloud access token present", + envVars: map[string]string{ + "OVH_AI_ENDPOINTS_ACCESS_TOKEN": "test-token", + }, + expectedProvider: "ovhcloud", + }, { name: "no api keys - defaults to dmr", envVars: map[string]string{}, @@ -233,6 +240,15 @@ func TestAutoModelConfig(t *testing.T) { expectedModel: "deepseek-ai/DeepSeek-V3.1", expectedMaxTokens: 32000, }, + { + name: "ovhcloud provider", + envVars: map[string]string{ + "OVH_AI_ENDPOINTS_ACCESS_TOKEN": "test-token", + }, + expectedProvider: "ovhcloud", + expectedModel: "Qwen3.5-397B-A17B", + expectedMaxTokens: 32000, + }, { name: "dmr provider (no api keys)", envVars: map[string]string{}, @@ -315,7 +331,7 @@ func TestDefaultModels(t *testing.T) { t.Parallel() // Test that DefaultModels map has all expected providers - expectedProviders := []string{"openai", "anthropic", "google", "dmr", "mistral", "openrouter", "baseten", "amazon-bedrock", "opencode-zen", "opencode-go"} + expectedProviders := []string{"openai", "anthropic", "google", "dmr", "mistral", "openrouter", "baseten", "ovhcloud", "amazon-bedrock", "opencode-zen", "opencode-go"} for _, provider := range expectedProviders { t.Run(provider, func(t *testing.T) { @@ -333,6 +349,7 @@ func TestDefaultModels(t *testing.T) { assert.Equal(t, "mistral-small-latest", DefaultModels["mistral"]) assert.Equal(t, "meta-llama/llama-3.3-70b-instruct", DefaultModels["openrouter"]) assert.Equal(t, "deepseek-ai/DeepSeek-V3.1", DefaultModels["baseten"]) + assert.Equal(t, "Qwen3.5-397B-A17B", DefaultModels["ovhcloud"]) assert.Equal(t, "global.anthropic.claude-sonnet-4-5-20250929-v1:0", DefaultModels["amazon-bedrock"]) assert.Equal(t, "deepseek-v4-flash", DefaultModels["opencode-go"]) assert.Equal(t, "deepseek-v4-flash-free", DefaultModels["opencode-zen"]) @@ -342,7 +359,7 @@ func TestAutoModelConfig_IntegrationWithDefaultModels(t *testing.T) { t.Parallel() // Verify that AutoModelConfig always returns a model from DefaultModels - providers := []string{"openai", "anthropic", "google", "mistral", "openrouter", "baseten", "opencode-zen"} + providers := []string{"openai", "anthropic", "google", "mistral", "openrouter", "baseten", "ovhcloud", "opencode-zen"} for _, provider := range providers { t.Run(provider, func(t *testing.T) { @@ -364,6 +381,8 @@ func TestAutoModelConfig_IntegrationWithDefaultModels(t *testing.T) { envVars["OPENROUTER_API_KEY"] = "test-key" case "baseten": envVars["BASETEN_API_KEY"] = "test-key" + case "ovhcloud": + envVars["OVH_AI_ENDPOINTS_ACCESS_TOKEN"] = "test-token" case "opencode-zen": envVars["OPENCODE_API_KEY"] = "test-key" } @@ -469,6 +488,22 @@ func TestAvailableProviders_PrecedenceOrder(t *testing.T) { providers = AvailableProviders(t.Context(), "", env) assert.Equal(t, "baseten", providers[0]) + // baseten wins over ovhcloud + env = environment.NewMapEnvProvider(map[string]string{ + "BASETEN_API_KEY": "test-key", + "OVH_AI_ENDPOINTS_ACCESS_TOKEN": "test-token", + }) + providers = AvailableProviders(t.Context(), "", env) + assert.Equal(t, "baseten", providers[0]) + + // ovhcloud wins over amazon-bedrock + env = environment.NewMapEnvProvider(map[string]string{ + "OVH_AI_ENDPOINTS_ACCESS_TOKEN": "test-token", + "AWS_ACCESS_KEY_ID": "test-key", + }) + providers = AvailableProviders(t.Context(), "", env) + assert.Equal(t, "ovhcloud", providers[0]) + // Only OPENCODE_API_KEY set - opencode-zen should win (higher priority than opencode-go) env = environment.NewMapEnvProvider(map[string]string{ "OPENCODE_API_KEY": "test-key", diff --git a/pkg/config/examples_test.go b/pkg/config/examples_test.go index 14c7947ec..223f80d87 100644 --- a/pkg/config/examples_test.go +++ b/pkg/config/examples_test.go @@ -21,6 +21,7 @@ import ( var modelsDevAbsentProviders = map[string]bool{ "dmr": true, // Docker Model Runner (local, not in catalog) "opencode-zen": true, // not yet registered in models.dev + "ovhcloud": true, // OVHcloud AI Endpoints (not yet in models.dev) } func collectExamples(t *testing.T) []string { diff --git a/pkg/model/provider/aliases.go b/pkg/model/provider/aliases.go index af783f754..f9d7933d3 100644 --- a/pkg/model/provider/aliases.go +++ b/pkg/model/provider/aliases.go @@ -73,6 +73,11 @@ var Aliases = map[string]Alias{ BaseURL: "https://inference.baseten.co/v1", TokenEnvVar: "BASETEN_API_KEY", }, + "ovhcloud": { + APIType: "openai", + BaseURL: "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1", + TokenEnvVar: "OVH_AI_ENDPOINTS_ACCESS_TOKEN", + }, "github-copilot": { APIType: "openai", BaseURL: "https://api.githubcopilot.com", diff --git a/pkg/model/provider/aliases_test.go b/pkg/model/provider/aliases_test.go index bb355cc82..12d5c951f 100644 --- a/pkg/model/provider/aliases_test.go +++ b/pkg/model/provider/aliases_test.go @@ -57,6 +57,20 @@ func TestBasetenAlias(t *testing.T) { assert.True(t, IsCatalogProvider("baseten")) } +func TestOVHcloudAlias(t *testing.T) { + t.Parallel() + + alias, ok := LookupAlias("ovhcloud") + require.True(t, ok) + assert.Equal(t, Alias{ + APIType: "openai", + BaseURL: "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1", + TokenEnvVar: "OVH_AI_ENDPOINTS_ACCESS_TOKEN", + }, alias) + assert.True(t, IsKnownProvider("ovhcloud")) + assert.True(t, IsCatalogProvider("ovhcloud")) +} + func TestEachAlias(t *testing.T) { t.Parallel() diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index df90eb740..ab6235fbd 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -221,7 +221,8 @@ func (c *Client) convertMessages(ctx context.Context, messages []chat.Message) [ // some such backends silently return an empty stream when a request carries // more than one system message. The DMR client already applies this merge // for the same reason. Apply it to explicit openai_chatcompletions configs - // and Baseten's built-in OpenAI-compatible endpoint. + // and built-in OpenAI-compatible endpoints (Baseten, OVHcloud) that do not + // set api_type in ProviderOpts. if shouldMergeConsecutiveMessages(&c.ModelConfig) { return oaistream.MergeConsecutiveMessages(converted) } @@ -232,7 +233,10 @@ func shouldMergeConsecutiveMessages(cfg *latest.ModelConfig) bool { if getAPIType(cfg) == "openai_chatcompletions" { return true } - return cfg != nil && cfg.Provider == "baseten" + if cfg == nil { + return false + } + return cfg.Provider == "baseten" || cfg.Provider == "ovhcloud" } // CreateChatCompletionStream creates a streaming chat completion request diff --git a/pkg/model/provider/openai/system_message_merge_test.go b/pkg/model/provider/openai/system_message_merge_test.go index 1b37a6606..046c5d458 100644 --- a/pkg/model/provider/openai/system_message_merge_test.go +++ b/pkg/model/provider/openai/system_message_merge_test.go @@ -45,6 +45,16 @@ func TestBaseten_MergesConsecutiveSystemMessages(t *testing.T) { }) } +func TestOVHcloud_MergesConsecutiveSystemMessages(t *testing.T) { + t.Parallel() + + assertMergesConsecutiveMessages(t, &latest.ModelConfig{ + Provider: "ovhcloud", + Model: "Qwen3.5-397B-A17B", + TokenKey: "MY_TOKEN", + }) +} + func assertMergesConsecutiveMessages(t *testing.T, cfg *latest.ModelConfig) { t.Helper()