diff --git a/frontend/public/icons/adapter-icons/NvidiaBuild.png b/frontend/public/icons/adapter-icons/NvidiaBuild.png new file mode 100644 index 0000000000..de449c13a4 Binary files /dev/null and b/frontend/public/icons/adapter-icons/NvidiaBuild.png differ diff --git a/frontend/public/icons/adapter-icons/OpenRouter.png b/frontend/public/icons/adapter-icons/OpenRouter.png new file mode 100644 index 0000000000..acaabd3da9 Binary files /dev/null and b/frontend/public/icons/adapter-icons/OpenRouter.png differ diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/base1.py b/unstract/sdk1/src/unstract/sdk1/adapters/base1.py index da154f36af..ebd93e3dbd 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/base1.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/base1.py @@ -353,6 +353,9 @@ def validate_model(adapter_metadata: dict[str, "Any"]) -> str: _OPENAI_PROVIDER_PREFIX = "openai/" _CUSTOM_OPENAI_PROVIDER_PREFIX = "custom_openai/" _OPENAI_REASONING_MODEL_PATTERN = re.compile(r"^(o1|o3|o4|gpt-5)(?:[-/]|$)") +# Keyless gateways still need a non-empty key; the OpenAI SDK rejects a +# null/blank one before any request reaches the endpoint. +_NO_AUTH_API_KEY = "no-auth" def _is_openai_reasoning_model(model: str) -> bool: @@ -494,6 +497,86 @@ def validate_model(adapter_metadata: dict[str, "Any"]) -> str: return f"{_CUSTOM_OPENAI_PROVIDER_PREFIX}{model}" +# Shared validation for branded adapters that reuse the OpenAI-compatible wire +# protocol with a fixed default endpoint. +def _validate_branded_openai_compatible( + adapter_metadata: dict[str, "Any"], default_api_base: str +) -> dict[str, "Any"]: + # Endpoint stays overridable so a provider URL change needs no release. + api_base = adapter_metadata.get("api_base") + if not (isinstance(api_base, str) and api_base.strip()): + api_base = default_api_base + adapter_metadata = {**adapter_metadata, "api_base": api_base} + return OpenAICompatibleLLMParameters.validate(adapter_metadata) + + +_NVIDIA_BUILD_API_BASE = "https://integrate.api.nvidia.com/v1" +_OPENROUTER_API_BASE = "https://openrouter.ai/api/v1" +_OPENROUTER_PROVIDER_PREFIX = "openrouter/" + + +class NvidiaBuildLLMParameters(OpenAICompatibleLLMParameters): + """OpenAI-compatible adapter for NVIDIA's hosted models (build.nvidia.com).""" + + # Required str so a directly-constructed instance stays valid. + api_base: str = _NVIDIA_BUILD_API_BASE + + @staticmethod + def validate(adapter_metadata: dict[str, "Any"]) -> dict[str, "Any"]: + return _validate_branded_openai_compatible( + adapter_metadata, _NVIDIA_BUILD_API_BASE + ) + + +class OpenRouterLLMParameters(BaseChatCompletionParameters): + """Adapter for OpenRouter (openrouter.ai). + + Routed through LiteLLM's native `openrouter/` provider so per-token costs + resolve and reasoning params map without provider-specific workarounds. + """ + + api_key: str + api_base: str = _OPENROUTER_API_BASE + reasoning_effort: str | None = None + + @staticmethod + def validate(adapter_metadata: dict[str, "Any"]) -> dict[str, "Any"]: + adapter_metadata = dict(adapter_metadata) + api_base = adapter_metadata.get("api_base") + if not (isinstance(api_base, str) and api_base.strip()): + adapter_metadata["api_base"] = _OPENROUTER_API_BASE + adapter_metadata["model"] = OpenRouterLLMParameters.validate_model( + adapter_metadata + ) + # Reasoning models reject a non-default temperature; drop it and send + # reasoning_effort only when reasoning is enabled. On a re-validation + # pass `enable_reasoning` is absent (it isn't serialized), so recover the + # state from a surviving reasoning_effort to keep reload idempotent — + # unless the user explicitly opted out with `enable_reasoning: false`. + enable_reasoning = adapter_metadata.get("enable_reasoning", False) + if ( + not enable_reasoning + and "enable_reasoning" not in adapter_metadata + and adapter_metadata.get("reasoning_effort") is not None + ): + enable_reasoning = True + adapter_metadata.pop("enable_reasoning", None) + if enable_reasoning: + adapter_metadata["temperature"] = None + else: + adapter_metadata.pop("reasoning_effort", None) + return OpenRouterLLMParameters(**adapter_metadata).model_dump() + + @staticmethod + def validate_model(adapter_metadata: dict[str, "Any"]) -> str: + model = str(adapter_metadata.get("model", "")).strip() + if not model: + raise ValueError("model is required for the OpenRouter adapter.") + if model.startswith(_OPENROUTER_PROVIDER_PREFIX): + return model + return f"{_OPENROUTER_PROVIDER_PREFIX}{model}" + + class AzureOpenAILLMParameters(BaseChatCompletionParameters): """See https://docs.litellm.ai/docs/providers/azure/#completion---using-azure_ad_token-api_base-api_version.""" @@ -1296,6 +1379,8 @@ class OpenAIEmbeddingParameters(BaseEmbeddingParameters): api_base: str | None = None embed_batch_size: int | None = 10 dimensions: int | None = None # For text-embedding-3-* models + # Strict endpoints reject the null LiteLLM sends when this is unset. + encoding_format: str | None = None @staticmethod def validate(adapter_metadata: dict[str, "Any"]) -> dict[str, "Any"]: @@ -1311,6 +1396,77 @@ def validate_model(adapter_metadata: dict[str, "Any"]) -> str: return model +# custom_openai has no embedding support, so these route through LiteLLM's +# native nvidia_nim provider; its default api_base is pinned for the schema. +_NVIDIA_NIM_PROVIDER_PREFIX = "nvidia_nim/" + + +class NvidiaBuildEmbeddingParameters(OpenAIEmbeddingParameters): + """OpenAI-compatible embeddings via NVIDIA's hosted endpoint (build.nvidia.com).""" + + # Overridable default endpoint. + api_base: str = _NVIDIA_BUILD_API_BASE + + @staticmethod + def validate(adapter_metadata: dict[str, "Any"]) -> dict[str, "Any"]: + adapter_metadata = dict(adapter_metadata) + # Endpoint stays overridable so a provider URL change needs no release. + api_base = adapter_metadata.get("api_base") + if not (isinstance(api_base, str) and api_base.strip()): + adapter_metadata["api_base"] = _NVIDIA_BUILD_API_BASE + # Strict endpoints reject the null LiteLLM sends; pin a real value. + adapter_metadata.setdefault("encoding_format", "float") + adapter_metadata["model"] = NvidiaBuildEmbeddingParameters.validate_model( + adapter_metadata + ) + return OpenAIEmbeddingParameters(**adapter_metadata).model_dump() + + @staticmethod + def validate_model(adapter_metadata: dict[str, "Any"]) -> str: + model = str(adapter_metadata.get("model", "")).strip() + if not model: + raise ValueError("model is required for the NVIDIA Build embedding adapter.") + if model.startswith(_NVIDIA_NIM_PROVIDER_PREFIX): + return model + return f"{_NVIDIA_NIM_PROVIDER_PREFIX}{model}" + + +class OpenAICompatibleEmbeddingParameters(OpenAIEmbeddingParameters): + """Embeddings for any OpenAI-compatible server (vLLM, self-hosted, etc.). + + Routes through the `openai/` provider with a user-supplied `api_base`; + cost stays unresolved since the endpoint is arbitrary. + """ + + # Some gateways are keyless; the endpoint is always required. + api_key: str | None = None + api_base: str + + @staticmethod + def validate(adapter_metadata: dict[str, "Any"]) -> dict[str, "Any"]: + adapter_metadata = dict(adapter_metadata) + api_key = adapter_metadata.get("api_key") + if not (isinstance(api_key, str) and api_key.strip()): + adapter_metadata["api_key"] = _NO_AUTH_API_KEY + # Strict endpoints reject the null LiteLLM sends; pin a real value. + adapter_metadata.setdefault("encoding_format", "float") + adapter_metadata["model"] = OpenAICompatibleEmbeddingParameters.validate_model( + adapter_metadata + ) + return OpenAICompatibleEmbeddingParameters(**adapter_metadata).model_dump() + + @staticmethod + def validate_model(adapter_metadata: dict[str, "Any"]) -> str: + model = str(adapter_metadata.get("model", "")).strip() + if not model: + raise ValueError( + "model is required for the OpenAI Compatible embedding adapter." + ) + if model.startswith(_OPENAI_PROVIDER_PREFIX): + return model + return f"{_OPENAI_PROVIDER_PREFIX}{model}" + + class AzureOpenAIEmbeddingParameters(BaseEmbeddingParameters): """See https://docs.litellm.ai/docs/providers/azure.""" diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/__init__.py b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/__init__.py index 3f7de6e916..18f240f04e 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/__init__.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/__init__.py @@ -4,8 +4,12 @@ from unstract.sdk1.adapters.embedding1.azure_openai import AzureOpenAIEmbeddingAdapter from unstract.sdk1.adapters.embedding1.bedrock import AWSBedrockEmbeddingAdapter from unstract.sdk1.adapters.embedding1.gemini import GeminiEmbeddingAdapter +from unstract.sdk1.adapters.embedding1.nvidia_build import NvidiaBuildEmbeddingAdapter from unstract.sdk1.adapters.embedding1.ollama import OllamaEmbeddingAdapter from unstract.sdk1.adapters.embedding1.openai import OpenAIEmbeddingAdapter +from unstract.sdk1.adapters.embedding1.openai_compatible import ( + OpenAICompatibleEmbeddingAdapter, +) from unstract.sdk1.adapters.embedding1.vertexai import VertexAIEmbeddingAdapter from unstract.sdk1.adapters.enums import AdapterTypes @@ -18,7 +22,9 @@ "AzureOpenAIEmbeddingAdapter", "AWSBedrockEmbeddingAdapter", "GeminiEmbeddingAdapter", + "NvidiaBuildEmbeddingAdapter", "OpenAIEmbeddingAdapter", + "OpenAICompatibleEmbeddingAdapter", "VertexAIEmbeddingAdapter", "OllamaEmbeddingAdapter", ] diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/nvidia_build.py b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/nvidia_build.py new file mode 100644 index 0000000000..d65d57ac51 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/nvidia_build.py @@ -0,0 +1,45 @@ +from typing import Any + +from unstract.sdk1.adapters.base1 import BaseAdapter, NvidiaBuildEmbeddingParameters +from unstract.sdk1.adapters.enums import AdapterTypes + +DESCRIPTION = ( + "Adapter for NVIDIA's OpenAI-compatible embedding models (build.nvidia.com). " + "Supply a model name and your NVIDIA API key; the endpoint is preconfigured." +) + + +class NvidiaBuildEmbeddingAdapter(NvidiaBuildEmbeddingParameters, BaseAdapter): + @staticmethod + def get_id() -> str: + return "nvidiabuild|2afcdf59-5323-4086-97fb-d9a0432e7795" + + @staticmethod + def get_metadata() -> dict[str, Any]: + return { + "name": "NVIDIA Build", + "version": "1.0.0", + "adapter": NvidiaBuildEmbeddingAdapter, + "description": DESCRIPTION, + "is_active": True, + } + + @staticmethod + def get_name() -> str: + return "NVIDIA Build" + + @staticmethod + def get_description() -> str: + return DESCRIPTION + + @staticmethod + def get_provider() -> str: + return "nvidia_build" + + @staticmethod + def get_icon() -> str: + return "/icons/adapter-icons/NvidiaBuild.png" + + @staticmethod + def get_adapter_type() -> AdapterTypes: + return AdapterTypes.EMBEDDING diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/openai_compatible.py b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/openai_compatible.py new file mode 100644 index 0000000000..12fb1f2f6f --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/openai_compatible.py @@ -0,0 +1,49 @@ +from typing import Any + +from unstract.sdk1.adapters.base1 import ( + BaseAdapter, + OpenAICompatibleEmbeddingParameters, +) +from unstract.sdk1.adapters.enums import AdapterTypes + +DESCRIPTION = ( + "Embedding adapter for servers that implement the OpenAI Embeddings API " + "(vLLM, self-hosted gateways, and third-party providers). " + "Use OpenAI for the official OpenAI service." +) + + +class OpenAICompatibleEmbeddingAdapter(OpenAICompatibleEmbeddingParameters, BaseAdapter): + @staticmethod + def get_id() -> str: + return "openaicompatible|65573de7-2ea5-4631-bb49-492717972455" + + @staticmethod + def get_metadata() -> dict[str, Any]: + return { + "name": "OpenAI Compatible", + "version": "1.0.0", + "adapter": OpenAICompatibleEmbeddingAdapter, + "description": DESCRIPTION, + "is_active": True, + } + + @staticmethod + def get_name() -> str: + return "OpenAI Compatible" + + @staticmethod + def get_description() -> str: + return DESCRIPTION + + @staticmethod + def get_provider() -> str: + return "custom_openai" + + @staticmethod + def get_icon() -> str: + return "/icons/adapter-icons/OpenAICompatible.png" + + @staticmethod + def get_adapter_type() -> AdapterTypes: + return AdapterTypes.EMBEDDING diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json index f1ca8d2230..b080a3d2f2 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json @@ -54,13 +54,6 @@ "title": "Dimensions", "description": "Output embedding dimensions. Only supported by text-embedding-3-* models. Leave empty for default dimensions." }, - "embed_batch_size": { - "type": "number", - "minimum": 0, - "multipleOf": 1, - "title": "Embedding Batch Size", - "default": 5 - }, "max_retries": { "type": "number", "minimum": 0, diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/custom_openai.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/custom_openai.json new file mode 100644 index 0000000000..bde1665229 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/custom_openai.json @@ -0,0 +1,53 @@ +{ + "title": "OpenAI Compatible Embedding", + "type": "object", + "required": [ + "adapter_name", + "model", + "api_base" + ], + "properties": { + "adapter_name": { + "type": "string", + "title": "Name", + "default": "", + "description": "Provide a unique name for this adapter instance. Example: compatible-emb-1" + }, + "model": { + "type": "string", + "title": "Model", + "description": "The embedding model name expected by your OpenAI-compatible endpoint. Examples: text-embedding-3-small, BAAI/bge-m3, nomic-embed-text" + }, + "api_key": { + "type": [ + "string", + "null" + ], + "title": "API Key", + "format": "password", + "description": "API key for your OpenAI-compatible endpoint. Leave empty if the endpoint does not require one." + }, + "api_base": { + "type": "string", + "format": "uri", + "title": "API Base", + "description": "Base URL for the OpenAI-compatible embeddings endpoint. Examples: https://gateway.example.com/v1, https://llm.example.net/openai/v1" + }, + "max_retries": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Max Retries", + "default": 3, + "description": "The maximum number of times to retry a request if it fails." + }, + "timeout": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Timeout", + "default": 240, + "description": "Timeout in seconds" + } + } +} diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/gemini.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/gemini.json index e614867eb7..5a5096716b 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/gemini.json +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/gemini.json @@ -25,13 +25,6 @@ "default": "", "format": "password" }, - "embed_batch_size": { - "type": "number", - "minimum": 1, - "multipleOf": 1, - "title": "Embed Batch Size", - "description": "Number of texts to embed in a single batch. Leave empty to use the system default." - }, "timeout": { "type": "number", "minimum": 0, diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/nvidia_build.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/nvidia_build.json new file mode 100644 index 0000000000..fa2b14247b --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/nvidia_build.json @@ -0,0 +1,51 @@ +{ + "title": "NVIDIA Build Embedding", + "type": "object", + "required": [ + "adapter_name", + "api_key", + "model" + ], + "properties": { + "adapter_name": { + "type": "string", + "title": "Name", + "default": "", + "description": "Provide a unique name for this adapter instance. Example: nvidia-build-emb-1" + }, + "model": { + "type": "string", + "title": "Model", + "description": "The embedding model name as listed on [build.nvidia.com/models](https://build.nvidia.com/models). Examples: nvidia/nv-embedqa-e5-v5, nvidia/llama-3.2-nv-embedqa-1b-v2" + }, + "api_key": { + "type": "string", + "title": "API Key", + "format": "password", + "description": "Your NVIDIA API key from [build.nvidia.com](https://build.nvidia.com)." + }, + "api_base": { + "type": "string", + "format": "uri", + "title": "API Base", + "default": "https://integrate.api.nvidia.com/v1", + "description": "NVIDIA Build endpoint. Pre-filled with the default; change only if NVIDIA moves the base URL." + }, + "max_retries": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Max Retries", + "default": 3, + "description": "The maximum number of times to retry a request if it fails." + }, + "timeout": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Timeout", + "default": 240, + "description": "Timeout in seconds" + } + } +} diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json index 8dd9bfa1c3..c241e5e711 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json @@ -25,13 +25,6 @@ "default": "", "description": "Provide the base URL where Ollama server is running. Example: `http://docker.host.internal:11434` or `http://localhost:11434`" }, - "embed_batch_size": { - "type": "number", - "minimum": 0, - "multipleOf": 1, - "title": "Embed Batch Size", - "default": 10 - }, "max_retries": { "type": "number", "minimum": 0, diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json index 3ad21d3564..30e2724032 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json @@ -37,13 +37,6 @@ "title": "Dimensions", "description": "Output embedding dimensions. Only supported by text-embedding-3-* models. Leave empty for default dimensions (1536 for small, 3072 for large)." }, - "embed_batch_size": { - "type": "number", - "minimum": 0, - "multipleOf": 1, - "title": "Embed Batch Size", - "default": 10 - }, "max_retries": { "type": "number", "minimum": 0, diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json index 6aa48e883f..5dec854624 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json +++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json @@ -38,13 +38,6 @@ "default": "global", "description": "The Google Cloud region for the Vertex AI endpoint (e.g., us-central1, global). If left empty, defaults to us-central1." }, - "embed_batch_size": { - "type": "number", - "minimum": 0, - "multipleOf": 1, - "title": "Embedding Batch Size", - "default": 10 - }, "embed_mode": { "type": "string", "title": "Embed Mode", diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/__init__.py b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/__init__.py index 1da3590f51..2d935e218f 100644 --- a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/__init__.py +++ b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/__init__.py @@ -6,9 +6,11 @@ from unstract.sdk1.adapters.llm1.anyscale import AnyscaleLLMAdapter from unstract.sdk1.adapters.llm1.azure_openai import AzureOpenAILLMAdapter from unstract.sdk1.adapters.llm1.bedrock import AWSBedrockLLMAdapter +from unstract.sdk1.adapters.llm1.nvidia_build import NvidiaBuildLLMAdapter from unstract.sdk1.adapters.llm1.ollama import OllamaLLMAdapter from unstract.sdk1.adapters.llm1.openai import OpenAILLMAdapter from unstract.sdk1.adapters.llm1.openai_compatible import OpenAICompatibleLLMAdapter +from unstract.sdk1.adapters.llm1.openrouter import OpenRouterLLMAdapter from unstract.sdk1.adapters.llm1.vertexai import VertexAILLMAdapter adapters: dict[str, dict[str, Any]] = {} @@ -21,8 +23,10 @@ "AnyscaleLLMAdapter", "AWSBedrockLLMAdapter", "AzureOpenAILLMAdapter", + "NvidiaBuildLLMAdapter", "OllamaLLMAdapter", "OpenAILLMAdapter", "OpenAICompatibleLLMAdapter", + "OpenRouterLLMAdapter", "VertexAILLMAdapter", ] diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/nvidia_build.py b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/nvidia_build.py new file mode 100644 index 0000000000..f601a9d532 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/nvidia_build.py @@ -0,0 +1,45 @@ +from typing import Any + +from unstract.sdk1.adapters.base1 import BaseAdapter, NvidiaBuildLLMParameters +from unstract.sdk1.adapters.enums import AdapterTypes + +DESCRIPTION = ( + "Adapter for NVIDIA's OpenAI-compatible hosted models (build.nvidia.com). " + "Supply a model name and your NVIDIA API key; the endpoint is preconfigured." +) + + +class NvidiaBuildLLMAdapter(NvidiaBuildLLMParameters, BaseAdapter): + @staticmethod + def get_id() -> str: + return "nvidiabuild|240d142d-68dd-4b6f-9716-80afd5c661cc" + + @staticmethod + def get_metadata() -> dict[str, Any]: + return { + "name": "NVIDIA Build", + "version": "1.0.0", + "adapter": NvidiaBuildLLMAdapter, + "description": DESCRIPTION, + "is_active": True, + } + + @staticmethod + def get_name() -> str: + return "NVIDIA Build" + + @staticmethod + def get_description() -> str: + return DESCRIPTION + + @staticmethod + def get_provider() -> str: + return "nvidia_build" + + @staticmethod + def get_icon() -> str: + return "/icons/adapter-icons/NvidiaBuild.png" + + @staticmethod + def get_adapter_type() -> AdapterTypes: + return AdapterTypes.LLM diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/openrouter.py b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/openrouter.py new file mode 100644 index 0000000000..9ed260ed86 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/openrouter.py @@ -0,0 +1,45 @@ +from typing import Any + +from unstract.sdk1.adapters.base1 import BaseAdapter, OpenRouterLLMParameters +from unstract.sdk1.adapters.enums import AdapterTypes + +DESCRIPTION = ( + "Adapter for OpenRouter's OpenAI-compatible API (openrouter.ai). " + "Supply a model name and your OpenRouter API key; the endpoint is preconfigured." +) + + +class OpenRouterLLMAdapter(OpenRouterLLMParameters, BaseAdapter): + @staticmethod + def get_id() -> str: + return "openrouter|17756452-5dca-4e10-9cbf-d9bc16505458" + + @staticmethod + def get_metadata() -> dict[str, Any]: + return { + "name": "OpenRouter", + "version": "1.0.0", + "adapter": OpenRouterLLMAdapter, + "description": DESCRIPTION, + "is_active": True, + } + + @staticmethod + def get_name() -> str: + return "OpenRouter" + + @staticmethod + def get_description() -> str: + return DESCRIPTION + + @staticmethod + def get_provider() -> str: + return "openrouter" + + @staticmethod + def get_icon() -> str: + return "/icons/adapter-icons/OpenRouter.png" + + @staticmethod + def get_adapter_type() -> AdapterTypes: + return AdapterTypes.LLM diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/nvidia_build.json b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/nvidia_build.json new file mode 100644 index 0000000000..2277516f22 --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/nvidia_build.json @@ -0,0 +1,112 @@ +{ + "title": "NVIDIA Build", + "type": "object", + "required": [ + "adapter_name", + "api_key", + "model" + ], + "properties": { + "adapter_name": { + "type": "string", + "title": "Name", + "default": "", + "description": "Provide a unique name for this adapter instance. Example: nvidia-build-1" + }, + "api_key": { + "type": "string", + "title": "API Key", + "format": "password", + "description": "Your NVIDIA API key from [build.nvidia.com](https://build.nvidia.com)." + }, + "model": { + "type": "string", + "title": "Model", + "description": "The model name as listed on [build.nvidia.com/models](https://build.nvidia.com/models). Examples: nvidia/nemotron-mini-4b-instruct, meta/llama-3.1-70b-instruct" + }, + "api_base": { + "type": "string", + "format": "url", + "title": "API Base", + "default": "https://integrate.api.nvidia.com/v1", + "description": "NVIDIA Build endpoint. Pre-filled with the default; change only if NVIDIA moves the base URL." + }, + "max_tokens": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Maximum Output Tokens", + "default": 4096, + "description": "Maximum number of output tokens to limit LLM replies. Leave it empty to use the provider default. Sent as `max_completion_tokens` when Enable Reasoning is on." + }, + "max_retries": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Max Retries", + "default": 5, + "description": "The maximum number of times to retry a request if it fails." + }, + "timeout": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Timeout", + "default": 900, + "description": "Timeout in seconds." + }, + "enable_reasoning": { + "type": "boolean", + "title": "Enable Reasoning", + "default": false, + "description": "Toggle on for reasoning models to drop `temperature` and send `max_completion_tokens` and `reasoning_effort` via `extra_body` so the upstream API accepts them." + } + }, + "allOf": [ + { + "if": { + "properties": { + "enable_reasoning": { + "const": true + } + }, + "required": [ + "enable_reasoning" + ] + }, + "then": { + "properties": { + "reasoning_effort": { + "type": "string", + "enum": [ + "low", + "medium", + "high" + ], + "default": "medium", + "title": "Reasoning Effort", + "description": "Sets the reasoning strength when Enable Reasoning is on." + } + }, + "required": [ + "reasoning_effort" + ] + } + }, + { + "if": { + "properties": { + "enable_reasoning": { + "const": false + } + }, + "required": [ + "enable_reasoning" + ] + }, + "then": { + "properties": {} + } + } + ] +} diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/openrouter.json b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/openrouter.json new file mode 100644 index 0000000000..a7eede115d --- /dev/null +++ b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/openrouter.json @@ -0,0 +1,112 @@ +{ + "title": "OpenRouter", + "type": "object", + "required": [ + "adapter_name", + "api_key", + "model" + ], + "properties": { + "adapter_name": { + "type": "string", + "title": "Name", + "default": "", + "description": "Provide a unique name for this adapter instance. Example: openrouter-1" + }, + "api_key": { + "type": "string", + "title": "API Key", + "format": "password", + "description": "Your OpenRouter API key from [openrouter.ai/keys](https://openrouter.ai/keys)." + }, + "model": { + "type": "string", + "title": "Model", + "description": "The model slug as listed on [openrouter.ai/models](https://openrouter.ai/models). Examples: openai/gpt-4o, anthropic/claude-3.5-sonnet, meta-llama/llama-3.1-70b-instruct" + }, + "api_base": { + "type": "string", + "format": "url", + "title": "API Base", + "default": "https://openrouter.ai/api/v1", + "description": "OpenRouter endpoint. Pre-filled with the default; change only if OpenRouter moves the base URL." + }, + "max_tokens": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Maximum Output Tokens", + "default": 4096, + "description": "Maximum number of output tokens to limit LLM replies. Leave it empty to use the provider default." + }, + "max_retries": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Max Retries", + "default": 5, + "description": "The maximum number of times to retry a request if it fails." + }, + "timeout": { + "type": "number", + "minimum": 0, + "multipleOf": 1, + "title": "Timeout", + "default": 900, + "description": "Timeout in seconds." + }, + "enable_reasoning": { + "type": "boolean", + "title": "Enable Reasoning", + "default": false, + "description": "Toggle on for reasoning models to send `reasoning_effort` to OpenRouter. See [OpenRouter reasoning docs](https://openrouter.ai/docs/use-cases/reasoning-tokens)." + } + }, + "allOf": [ + { + "if": { + "properties": { + "enable_reasoning": { + "const": true + } + }, + "required": [ + "enable_reasoning" + ] + }, + "then": { + "properties": { + "reasoning_effort": { + "type": "string", + "enum": [ + "low", + "medium", + "high" + ], + "default": "medium", + "title": "Reasoning Effort", + "description": "Sets the reasoning strength when Enable Reasoning is on." + } + }, + "required": [ + "reasoning_effort" + ] + } + }, + { + "if": { + "properties": { + "enable_reasoning": { + "const": false + } + }, + "required": [ + "enable_reasoning" + ] + }, + "then": { + "properties": {} + } + } + ] +} diff --git a/unstract/sdk1/src/unstract/sdk1/embedding.py b/unstract/sdk1/src/unstract/sdk1/embedding.py index 109f2cd6a1..1e02944f83 100644 --- a/unstract/sdk1/src/unstract/sdk1/embedding.py +++ b/unstract/sdk1/src/unstract/sdk1/embedding.py @@ -29,6 +29,10 @@ litellm.drop_params = True +# Asymmetric embedding models need an input_type (query|passage); other +# providers reject the field, so it's sent only for this prefix. +_NVIDIA_NIM_MODEL_PREFIX = "nvidia_nim/" + class Embedding: """Unified embedding interface powered by LiteLLM. @@ -103,6 +107,8 @@ def __init__( self.platform_kwargs: dict[str, object] = kwargs self.kwargs: dict[str, object] = self.adapter.validate(self._adapter_metadata) self._cost_model: str | None = self.kwargs.pop("cost_model", None) + # Client-side batching hint, not an API field — keep it off the wire. + self.kwargs.pop("embed_batch_size", None) except (ValidationError, ValueError) as e: raise SdkError("Invalid embedding adapter metadata: " + str(e)) from e @@ -119,13 +125,19 @@ def _get_adapter_info(self) -> str: return f"{self._adapter_name} ({name})" return name - def get_embedding(self, text: str) -> list[float]: + def _prepare_call(self, input_type: str | None) -> tuple[str, dict, int | None]: + """Split model/retries out of kwargs and inject input_type when applicable.""" + kwargs = self.kwargs.copy() + model = kwargs.pop("model") + max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info()) + if input_type and str(model).startswith(_NVIDIA_NIM_MODEL_PREFIX): + kwargs["input_type"] = input_type + return model, kwargs, max_retries + + def get_embedding(self, text: str, input_type: str = "query") -> list[float]: """Return embedding vector for query string.""" try: - kwargs = self.kwargs.copy() - model = kwargs.pop("model") - max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info()) - + model, kwargs, max_retries = self._prepare_call(input_type) resp = call_with_retry( lambda: litellm.embedding(model=model, input=[text], **kwargs), max_retries=max_retries, @@ -136,13 +148,12 @@ def get_embedding(self, text: str) -> list[float]: except Exception as e: raise parse_litellm_err(e, self._get_adapter_info()) from e - def get_embeddings(self, texts: list[str]) -> list[list[float]]: + def get_embeddings( + self, texts: list[str], input_type: str = "passage" + ) -> list[list[float]]: """Return embedding vectors for list of query strings.""" try: - kwargs = self.kwargs.copy() - model = kwargs.pop("model") - max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info()) - + model, kwargs, max_retries = self._prepare_call(input_type) resp = call_with_retry( lambda: litellm.embedding(model=model, input=texts, **kwargs), max_retries=max_retries, @@ -153,13 +164,10 @@ def get_embeddings(self, texts: list[str]) -> list[list[float]]: except Exception as e: raise parse_litellm_err(e, self._get_adapter_info()) from e - async def get_aembedding(self, text: str) -> list[float]: + async def get_aembedding(self, text: str, input_type: str = "query") -> list[float]: """Return async embedding vector for query string.""" try: - kwargs = self.kwargs.copy() - model = kwargs.pop("model") - max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info()) - + model, kwargs, max_retries = self._prepare_call(input_type) resp = await acall_with_retry( lambda: litellm.aembedding(model=model, input=[text], **kwargs), max_retries=max_retries, @@ -170,13 +178,12 @@ async def get_aembedding(self, text: str) -> list[float]: except Exception as e: raise parse_litellm_err(e, self._get_adapter_info()) from e - async def get_aembeddings(self, texts: list[str]) -> list[list[float]]: + async def get_aembeddings( + self, texts: list[str], input_type: str = "passage" + ) -> list[list[float]]: """Return async embedding vectors for list of query strings.""" try: - kwargs = self.kwargs.copy() - model = kwargs.pop("model") - max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info()) - + model, kwargs, max_retries = self._prepare_call(input_type) resp = await acall_with_retry( lambda: litellm.aembedding(model=model, input=texts, **kwargs), max_retries=max_retries, @@ -256,25 +263,25 @@ def __init__( ) def _get_query_embedding(self, query: str) -> list[float]: - return self._embedding_instance.get_embedding(query) + return self._embedding_instance.get_embedding(query, input_type="query") def _get_text_embedding(self, text: str) -> list[float]: - return self._embedding_instance.get_embedding(text) + return self._embedding_instance.get_embedding(text, input_type="passage") def _get_text_embeddings(self, texts: list[str]) -> list[list[float]]: - return self._embedding_instance.get_embeddings(texts) + return self._embedding_instance.get_embeddings(texts, input_type="passage") def get_query_embedding(self, query: str) -> list[float]: return self._get_query_embedding(query) async def _aget_query_embedding(self, query: str) -> list[float]: - return await self._embedding_instance.get_aembedding(query) + return await self._embedding_instance.get_aembedding(query, input_type="query") async def _aget_text_embedding(self, text: str) -> list[float]: - return await self._embedding_instance.get_aembedding(text) + return await self._embedding_instance.get_aembedding(text, input_type="passage") async def _aget_text_embeddings(self, texts: list[str]) -> list[list[float]]: - return await self._embedding_instance.get_aembeddings(texts) + return await self._embedding_instance.get_aembeddings(texts, input_type="passage") async def get_aquery_embedding(self, query: str) -> list[float]: return await self._aget_query_embedding(query) diff --git a/unstract/sdk1/tests/test_branded_openai_adapters.py b/unstract/sdk1/tests/test_branded_openai_adapters.py new file mode 100644 index 0000000000..c876698847 --- /dev/null +++ b/unstract/sdk1/tests/test_branded_openai_adapters.py @@ -0,0 +1,311 @@ +import json + +import pytest +from unstract.sdk1.adapters.base1 import ( + NvidiaBuildEmbeddingParameters, + NvidiaBuildLLMParameters, + OpenAICompatibleEmbeddingParameters, + OpenRouterLLMParameters, +) +from unstract.sdk1.adapters.constants import Common +from unstract.sdk1.adapters.embedding1 import adapters as embedding_adapters +from unstract.sdk1.adapters.embedding1.nvidia_build import NvidiaBuildEmbeddingAdapter +from unstract.sdk1.adapters.embedding1.openai_compatible import ( + OpenAICompatibleEmbeddingAdapter, +) +from unstract.sdk1.adapters.llm1 import adapters as llm_adapters +from unstract.sdk1.adapters.llm1.nvidia_build import NvidiaBuildLLMAdapter +from unstract.sdk1.adapters.llm1.openrouter import OpenRouterLLMAdapter + +_NVIDIA_BUILD_API_BASE = "https://integrate.api.nvidia.com/v1" +_OPENROUTER_API_BASE = "https://openrouter.ai/api/v1" + + +# --- Branded LLM adapters ------------------------------------------------- + + +@pytest.mark.parametrize( + "adapter", + [NvidiaBuildLLMAdapter, OpenRouterLLMAdapter], +) +def test_branded_llm_adapter_is_registered(adapter: type) -> None: + adapter_id = adapter.get_id() + assert adapter_id in llm_adapters + assert llm_adapters[adapter_id][Common.MODULE] is adapter + + +def test_nvidia_llm_prefixes_model_via_custom_openai() -> None: + validated = NvidiaBuildLLMParameters.validate({"model": "some-model", "api_key": "k"}) + + assert validated["model"] == "custom_openai/some-model" + assert validated["api_base"] == _NVIDIA_BUILD_API_BASE + + +def test_openrouter_llm_routes_via_native_openrouter_provider() -> None: + from litellm import get_llm_provider + + validated = OpenRouterLLMParameters.validate( + {"model": "openai/gpt-4o", "api_key": "k"} + ) + + assert validated["model"] == "openrouter/openai/gpt-4o" + assert validated["api_base"] == _OPENROUTER_API_BASE + # Native routing is what lets LiteLLM resolve OpenRouter pricing. + assert get_llm_provider(validated["model"])[1] == "openrouter" + + +def test_openrouter_model_prefix_is_idempotent() -> None: + once = OpenRouterLLMParameters.validate({"model": "openai/gpt-4o", "api_key": "k"}) + twice = OpenRouterLLMParameters.validate(dict(once)) + + assert twice["model"] == once["model"] == "openrouter/openai/gpt-4o" + + +def test_openrouter_forwards_reasoning_effort_only_when_enabled() -> None: + on = OpenRouterLLMParameters.validate( + { + "model": "openai/gpt-5", + "api_key": "k", + "enable_reasoning": True, + "reasoning_effort": "high", + } + ) + assert on["reasoning_effort"] == "high" + # enable_reasoning is a UI-only toggle and must not leak to LiteLLM. + assert "enable_reasoning" not in on + # temperature dropped so OpenAI o-series (via OpenRouter) don't reject it. + assert on["temperature"] is None + + off = OpenRouterLLMParameters.validate( + { + "model": "openai/gpt-4o", + "api_key": "k", + "enable_reasoning": False, + "reasoning_effort": "high", + } + ) + assert off["reasoning_effort"] is None + + +def test_openrouter_reasoning_survives_revalidation() -> None: + once = OpenRouterLLMParameters.validate( + { + "model": "openai/gpt-5", + "api_key": "k", + "enable_reasoning": True, + "reasoning_effort": "high", + } + ) + assert once["reasoning_effort"] == "high" + # Reloading a saved config (no enable_reasoning key) must keep reasoning on. + twice = OpenRouterLLMParameters.validate(dict(once)) + assert twice["reasoning_effort"] == "high" + assert twice["temperature"] is None + + +@pytest.mark.parametrize( + ("params", "default_base"), + [ + (NvidiaBuildLLMParameters, _NVIDIA_BUILD_API_BASE), + (OpenRouterLLMParameters, _OPENROUTER_API_BASE), + ], +) +def test_branded_llm_blank_api_base_falls_back_to_default( + params: type, default_base: str +) -> None: + validated = params.validate({"model": "m", "api_key": "k", "api_base": " "}) + + assert validated["api_base"] == default_base + + +@pytest.mark.parametrize( + "params", + [NvidiaBuildLLMParameters, OpenRouterLLMParameters], +) +def test_branded_llm_honours_api_base_override(params: type) -> None: + validated = params.validate( + {"model": "m", "api_key": "k", "api_base": "https://proxy.internal/v1"} + ) + + assert validated["api_base"] == "https://proxy.internal/v1" + + +@pytest.mark.parametrize( + ("adapter", "default_base"), + [ + (NvidiaBuildLLMAdapter, _NVIDIA_BUILD_API_BASE), + (OpenRouterLLMAdapter, _OPENROUTER_API_BASE), + ], +) +def test_branded_llm_schema_exposes_api_base_with_default( + adapter: type, default_base: str +) -> None: + schema = json.loads(adapter.get_json_schema()) + + assert schema["properties"]["api_base"]["default"] == default_base + assert "api_base" not in schema["required"] + assert "model" in schema["required"] + + +# --- Branded / generic embedding adapters --------------------------------- + + +def test_nvidia_embedding_registered_and_routes_via_nvidia_nim() -> None: + adapter_id = NvidiaBuildEmbeddingAdapter.get_id() + assert adapter_id in embedding_adapters + + validated = NvidiaBuildEmbeddingParameters.validate( + {"model": "nvidia/nv-embedqa-e5-v5", "api_key": "k"} + ) + assert validated["model"] == "nvidia_nim/nvidia/nv-embedqa-e5-v5" + assert validated["api_base"] == _NVIDIA_BUILD_API_BASE + + +def test_nvidia_embedding_defaults_encoding_format_to_float() -> None: + # NVIDIA rejects the null encoding_format LiteLLM sends when unset. + validated = NvidiaBuildEmbeddingParameters.validate( + {"model": "nvidia/nv-embedqa-e5-v5", "api_key": "k"} + ) + assert validated["encoding_format"] == "float" + + +def test_compatible_embedding_defaults_encoding_format_to_float() -> None: + validated = OpenAICompatibleEmbeddingParameters.validate( + {"model": "BAAI/bge-m3", "api_base": "https://gw.example/v1"} + ) + assert validated["encoding_format"] == "float" + + +def test_nvidia_embedding_honours_api_base_override() -> None: + validated = NvidiaBuildEmbeddingParameters.validate( + {"model": "m", "api_key": "k", "api_base": "https://proxy.internal/v1"} + ) + assert validated["api_base"] == "https://proxy.internal/v1" + + +def test_nvidia_embedding_model_prefix_is_idempotent() -> None: + once = NvidiaBuildEmbeddingParameters.validate({"model": "m", "api_key": "k"}) + twice = NvidiaBuildEmbeddingParameters.validate(dict(once)) + assert twice["model"] == once["model"] == "nvidia_nim/m" + + +def test_compatible_embedding_registered_and_routes_via_openai() -> None: + adapter_id = OpenAICompatibleEmbeddingAdapter.get_id() + assert adapter_id in embedding_adapters + assert OpenAICompatibleEmbeddingAdapter.get_provider() == "custom_openai" + + validated = OpenAICompatibleEmbeddingParameters.validate( + {"model": "BAAI/bge-m3", "api_base": "https://gw.example/v1"} + ) + assert validated["model"] == "openai/BAAI/bge-m3" + assert validated["api_base"] == "https://gw.example/v1" + + +def test_compatible_embedding_blank_api_key_uses_placeholder() -> None: + # Keyless gateways still need a non-empty key or the OpenAI SDK rejects it. + validated = OpenAICompatibleEmbeddingParameters.validate( + {"model": "m", "api_base": "https://gw.example/v1", "api_key": " "} + ) + assert isinstance(validated["api_key"], str) + assert validated["api_key"].strip() + + +def test_compatible_embedding_requires_api_base() -> None: + with pytest.raises(Exception): # noqa: B017 - pydantic ValidationError + OpenAICompatibleEmbeddingParameters.validate({"model": "m"}) + + +def test_compatible_embedding_schema_loadable() -> None: + schema = json.loads(OpenAICompatibleEmbeddingAdapter.get_json_schema()) + assert schema["title"] == "OpenAI Compatible Embedding" + assert "api_base" in schema["required"] + assert "model" in schema["required"] + + +@pytest.mark.parametrize( + "adapter", + [NvidiaBuildEmbeddingAdapter, OpenAICompatibleEmbeddingAdapter], +) +def test_embedding_schema_drops_embed_batch_size(adapter: type) -> None: + # embed_batch_size is an inert llama-index hint; it must not be shown. + schema = json.loads(adapter.get_json_schema()) + assert "embed_batch_size" not in schema["properties"] + + +def test_embedding_strips_embed_batch_size_before_litellm( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Non-API fields must not reach the provider; encoding_format must be sent. + import unstract.sdk1.embedding as emb_mod + + captured: dict = {} + + def fake_embedding(model: str, input: list, **kwargs: object) -> dict: # noqa: A002 + captured["model"] = model + captured.update(kwargs) + return {"data": [{"embedding": [0.0, 1.0, 2.0]}]} + + monkeypatch.setattr(emb_mod.litellm, "embedding", fake_embedding) + + emb_mod.Embedding( + adapter_id=NvidiaBuildEmbeddingAdapter.get_id(), + adapter_metadata={ + "adapter_name": "n", + "model": "nvidia/nv-embedqa-e5-v5", + "api_key": "k", + "embed_batch_size": 10, + }, + ) + + assert "embed_batch_size" not in captured + assert captured["encoding_format"] == "float" + assert captured["model"] == "nvidia_nim/nvidia/nv-embedqa-e5-v5" + # Query path must send input_type for asymmetric models. + assert captured["input_type"] == "query" + + +def _patch_capture_embedding(monkeypatch: pytest.MonkeyPatch) -> dict: + import unstract.sdk1.embedding as emb_mod + + captured: dict = {} + + def fake_embedding(model: str, input: list, **kwargs: object) -> dict: # noqa: A002 + captured["model"] = model + captured.update(kwargs) + return {"data": [{"embedding": [0.0, 1.0]}] * len(input)} + + monkeypatch.setattr(emb_mod.litellm, "embedding", fake_embedding) + return captured + + +def test_nvidia_embedding_batch_sends_passage_input_type( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import unstract.sdk1.embedding as emb_mod + + captured = _patch_capture_embedding(monkeypatch) + emb = emb_mod.Embedding( + adapter_id=NvidiaBuildEmbeddingAdapter.get_id(), + adapter_metadata={"model": "nvidia/nv-embedqa-e5-v5", "api_key": "k"}, + ) + emb.get_embeddings(["a", "b"]) + assert captured["input_type"] == "passage" + + +def test_compatible_embedding_omits_input_type( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # input_type is NVIDIA-only; non-nvidia_nim models must not receive it. + import unstract.sdk1.embedding as emb_mod + + captured = _patch_capture_embedding(monkeypatch) + emb_mod.Embedding( + adapter_id=OpenAICompatibleEmbeddingAdapter.get_id(), + adapter_metadata={ + "model": "BAAI/bge-m3", + "api_base": "https://gw.example/v1", + "api_key": "k", + }, + ) + assert "input_type" not in captured + assert captured["model"] == "openai/BAAI/bge-m3" diff --git a/unstract/sdk1/tests/test_gemini_embedding.py b/unstract/sdk1/tests/test_gemini_embedding.py index 2bf031e3ee..3bdc0f6d70 100644 --- a/unstract/sdk1/tests/test_gemini_embedding.py +++ b/unstract/sdk1/tests/test_gemini_embedding.py @@ -39,9 +39,10 @@ def test_json_schema_required_fields(self) -> None: schema = json.loads(GeminiEmbeddingAdapter.get_json_schema()) assert set(schema["required"]) == {"adapter_name", "api_key", "model"} - def test_json_schema_no_batch_size_default(self) -> None: + def test_json_schema_omits_batch_size(self) -> None: + # embed_batch_size is an inert client-side hint and is not exposed. schema = json.loads(GeminiEmbeddingAdapter.get_json_schema()) - assert "default" not in schema["properties"]["embed_batch_size"] + assert "embed_batch_size" not in schema["properties"] def test_json_schema_api_key_password_format(self) -> None: schema = json.loads(GeminiEmbeddingAdapter.get_json_schema())