From 5fb714d49cbf7e0552665a8585ca1c54f47eb6bd Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 09:24:13 -0700 Subject: [PATCH] Add Azure AI Studio inference API (#4020) (cherry picked from commit 0eab22cb8125693e2ceb97917666473a085fb4e0) --- output/openapi/elasticsearch-openapi.json | 160 + .../elasticsearch-serverless-openapi.json | 160 + output/schema/schema-serverless.json | 3491 +++-------------- output/schema/schema.json | 349 ++ output/typescript/types.ts | 33 + specification/_doc_ids/table.csv | 1 + .../inference.put_azureaistudio.json | 35 + .../PutAzureAiStudioRequest.ts | 164 + .../PutAzureAiStudioResponse.ts | 24 + .../PutAzureAiStudioRequestExample1.yaml | 14 + .../PutAzureAiStudioRequestExample2.yaml | 14 + 11 files changed, 1551 insertions(+), 2894 deletions(-) create mode 100644 specification/_json_spec/inference.put_azureaistudio.json create mode 100644 specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts create mode 100644 specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts create mode 100644 specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml create mode 100644 specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 379b3c8348..2b1e590179 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17844,6 +17844,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure AI studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureAiStudioRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{azureopenai_inference_id}": { "put": { "tags": [ @@ -77311,6 +77397,80 @@ } } }, + "inference.put_azureaistudio:AzureAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureaistudio:ServiceType": { + "type": "string", + "enum": [ + "azureaistudio" + ] + }, + "inference.put_azureaistudio:AzureAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://ai.azure.com/" + }, + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "endpoint_type": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio" + }, + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "type": "string" + }, + "target": { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "endpoint_type", + "target", + "provider" + ] + }, + "inference.put_azureaistudio:AzureAiStudioTaskSettings": { + "type": "object", + "properties": { + "do_sample": { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "type": "number" + }, + "max_new_tokens": { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "type": "number" + }, + "user": { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_azureopenai:AzureOpenAITaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 8e63f206a6..6db845f83b 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9670,6 +9670,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{azureaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Azure AI studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-azureaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "azureaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_azureaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_azureaistudio:AzureAiStudioTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAzureAiStudioRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.14.0" + } + }, "/_inference/{task_type}/{azureopenai_inference_id}": { "put": { "tags": [ @@ -48507,6 +48593,80 @@ } } }, + "inference.put_azureaistudio:AzureAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_azureaistudio:ServiceType": { + "type": "string", + "enum": [ + "azureaistudio" + ] + }, + "inference.put_azureaistudio:AzureAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://ai.azure.com/" + }, + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "endpoint_type": { + "externalDocs": { + "url": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio" + }, + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "type": "string" + }, + "target": { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "endpoint_type", + "target", + "provider" + ] + }, + "inference.put_azureaistudio:AzureAiStudioTaskSettings": { + "type": "object", + "properties": { + "do_sample": { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "type": "number" + }, + "max_new_tokens": { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "type": "number" + }, + "user": { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "type": "string" + } + } + }, "inference.put_azureopenai:AzureOpenAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 1304b877b8..8820a53585 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4598,26 +4598,105 @@ "visibility": "public" }, "stack": { -<<<<<<< HEAD "since": "8.16.0", -======= - "since": "8.14.0", ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "stability": "stable", "visibility": "public" } }, -<<<<<<< HEAD "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-alibabacloud", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html", "name": "inference.put_alibabacloud", -======= + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_alibabacloud" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_alibabacloud" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{alibabacloud_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "name": "inference.put_azureaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureaistudio_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-azureopenai", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", "name": "inference.put_azureopenai", ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "privileges": { "cluster": [ "manage_inference" @@ -4625,11 +4704,7 @@ }, "request": { "name": "Request", -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" -======= "namespace": "inference.put_azureopenai" ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4637,11 +4712,7 @@ ], "response": { "name": "Response", -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" -======= "namespace": "inference.put_azureopenai" ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, "responseMediaType": [ "application/json" @@ -4651,11 +4722,7 @@ "methods": [ "PUT" ], -<<<<<<< HEAD - "path": "/_inference/{task_type}/{alibabacloud_inference_id}" -======= "path": "/_inference/{task_type}/{azureopenai_inference_id}" ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) } ] }, @@ -4711,8 +4778,6 @@ "visibility": "public" }, "stack": { -<<<<<<< HEAD -======= "since": "8.18.0", "stability": "stable", "visibility": "public" @@ -4803,7 +4868,6 @@ "visibility": "public" }, "stack": { ->>>>>>> f7c35e7b1 (Add Mistral inference details (#3997)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -27139,9 +27203,6 @@ } }, { -<<<<<<< HEAD -======= -<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", "name": "service", "required": true, @@ -27282,59 +27343,26 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", -======= -<<<<<<< HEAD - "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", -======= - "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", -<<<<<<< HEAD - "namespace": "inference.put_amazonbedrock" -======= -<<<<<<< HEAD - "namespace": "inference.put_amazonbedrock" -======= - "namespace": "inference.put_azureopenai" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "namespace": "inference.put_azureaistudio" } } }, { -<<<<<<< HEAD - "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", -======= -<<<<<<< HEAD - "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", -======= - "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "AmazonBedrockServiceSettings", - "namespace": "inference.put_amazonbedrock" -======= -<<<<<<< HEAD - "name": "AmazonBedrockServiceSettings", - "namespace": "inference.put_amazonbedrock" -======= - "name": "AzureOpenAIServiceSettings", - "namespace": "inference.put_azureopenai" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" } } }, @@ -27345,54 +27373,24 @@ "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "AmazonBedrockTaskSettings", - "namespace": "inference.put_amazonbedrock" -======= -<<<<<<< HEAD - "name": "AmazonBedrockTaskSettings", - "namespace": "inference.put_amazonbedrock" -======= - "name": "AzureOpenAITaskSettings", - "namespace": "inference.put_azureopenai" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" } } } ] }, -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutAmazonBedrockRequestExample1": { - "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" - }, - "PutAmazonBedrockRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" -<<<<<<< HEAD -======= -======= - "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAzureOpenAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", "summary": "A text embedding task", - "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" }, - "PutAzureOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", "summary": "A completion task", - "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" } }, "inherits": { @@ -27404,56 +27402,24 @@ "kind": "request", "name": { "name": "Request", -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "namespace": "inference.put_amazonbedrock" + "namespace": "inference.put_azureaistudio" }, "path": [ { "description": "The type of the inference task that the model will perform.", -<<<<<<< HEAD -======= -======= - "namespace": "inference.put_azureopenai" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "AmazonBedrockTaskType", - "namespace": "inference.put_amazonbedrock" -======= -<<<<<<< HEAD - "name": "AmazonBedrockTaskType", - "namespace": "inference.put_amazonbedrock" -======= - "name": "AzureOpenAITaskType", - "namespace": "inference.put_azureopenai" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" } } }, { "description": "The unique identifier of the inference endpoint.", -<<<<<<< HEAD - "name": "amazonbedrock_inference_id", -======= -<<<<<<< HEAD - "name": "amazonbedrock_inference_id", -======= - "name": "azureopenai_inference_id", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "name": "azureaistudio_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27465,15 +27431,7 @@ } ], "query": [], -<<<<<<< HEAD - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" -======= -<<<<<<< HEAD - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" -======= - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" }, { "body": { @@ -27489,13 +27447,9 @@ "kind": "response", "name": { "name": "Response", -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "namespace": "inference.put_amazonbedrock" + "namespace": "inference.put_azureaistudio" }, - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -27519,26 +27473,26 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `anthropic`.", + "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_azureopenai" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AnthropicServiceSettings", - "namespace": "inference.put_anthropic" + "name": "AzureOpenAIServiceSettings", + "namespace": "inference.put_azureopenai" } } }, @@ -27549,18 +27503,24 @@ "type": { "kind": "instance_of", "type": { - "name": "AnthropicTaskSettings", - "namespace": "inference.put_anthropic" + "name": "AzureOpenAITaskSettings", + "namespace": "inference.put_azureopenai" } } } ] }, - "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAnthropicRequestExample1": { - "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", - "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + "PutAzureOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + }, + "PutAzureOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" } }, "inherits": { @@ -27572,24 +27532,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_azureopenai" }, "path": [ { - "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AnthropicTaskType", - "namespace": "inference.put_anthropic" + "name": "AzureOpenAITaskType", + "namespace": "inference.put_azureopenai" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "anthropic_inference_id", + "name": "azureopenai_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27601,7 +27561,7 @@ } ], "query": [], - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" + "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" }, { "body": { @@ -27617,9 +27577,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_azureopenai" }, - "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" + "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -27643,54 +27603,37 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", + "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_azureaistudio" + "namespace": "inference.put_hugging_face" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AzureAiStudioServiceSettings", - "namespace": "inference.put_azureaistudio" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "AzureAiStudioTaskSettings", - "namespace": "inference.put_azureaistudio" + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" } } } ] }, - "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAzureAiStudioRequestExample1": { - "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "PutHuggingFaceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", "summary": "A text embedding task", - "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" - }, - "PutAzureAiStudioRequestExample2": { - "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", - "summary": "A completion task", - "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" } }, "inherits": { @@ -27702,7 +27645,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_azureaistudio" + "namespace": "inference.put_hugging_face" }, "path": [ { @@ -27712,14 +27655,14 @@ "type": { "kind": "instance_of", "type": { - "name": "AzureAiStudioTaskType", - "namespace": "inference.put_azureaistudio" + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "azureaistudio_inference_id", + "name": "huggingface_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27731,7 +27674,7 @@ } ], "query": [], - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" }, { "body": { @@ -27747,9 +27690,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_azureaistudio" + "namespace": "inference.put_hugging_face" }, - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" + "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -27773,27 +27716,26 @@ } }, { -<<<<<<< HEAD - "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", + "description": "The type of service supported for the specified task type. In this case, `jinaai`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_azureopenai" + "namespace": "inference.put_jinaai" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `azureopenai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `jinaai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AzureOpenAIServiceSettings", - "namespace": "inference.put_azureopenai" + "name": "JinaAIServiceSettings", + "namespace": "inference.put_jinaai" } } }, @@ -27804,24 +27746,24 @@ "type": { "kind": "instance_of", "type": { - "name": "AzureOpenAITaskSettings", - "namespace": "inference.put_azureopenai" + "name": "JinaAITaskSettings", + "namespace": "inference.put_jinaai" } } } ] }, - "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAzureOpenAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/azure_openai_embeddings` to create an inference endpoint that performs a `text_embedding` task. You do not specify a model, as it is defined already in the Azure OpenAI deployment.", + "PutJinaAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", "summary": "A text embedding task", - "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" }, - "PutAzureOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/azure_openai_completion` to create an inference endpoint that performs a `completion` task.", - "summary": "A completion task", - "value": "{\n \"service\": \"azureopenai\",\n \"service_settings\": {\n \"api_key\": \"Api-Key\",\n \"resource_name\": \"Resource-name\",\n \"deployment_id\": \"Deployment-id\",\n \"api_version\": \"2024-02-01\"\n }\n}" + "PutJinaAiRequestExample2": { + "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", + "summary": "A rerank task", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" } }, "inherits": { @@ -27833,24 +27775,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_azureopenai" + "namespace": "inference.put_jinaai" }, "path": [ { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "description": "The type of the inference task that the model will perform.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AzureOpenAITaskType", - "namespace": "inference.put_azureopenai" + "name": "JinaAITaskType", + "namespace": "inference.put_jinaai" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "azureopenai_inference_id", + "name": "jinaai_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27862,7 +27804,7 @@ } ], "query": [], - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L27-L88" + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84" }, { "body": { @@ -27878,9 +27820,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_azureopenai" + "namespace": "inference.put_jinaai" }, - "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" + "specLocation": "inference/put_jinaai/PutJinaAiResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -27904,56 +27846,36 @@ } }, { -======= ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "description": "The type of service supported for the specified task type. In this case, `cohere`.", + "description": "The type of service supported for the specified task type. In this case, `mistral`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_cohere" + "namespace": "inference.put_mistral" } } }, { - "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", + "description": "Settings used to install the inference model. These settings are specific to the `mistral` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "CohereServiceSettings", - "namespace": "inference.put_cohere" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "CohereTaskSettings", - "namespace": "inference.put_cohere" + "name": "MistralServiceSettings", + "namespace": "inference.put_mistral" } } } ] }, - "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutCohereRequestExample1": { - "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" - }, - "PutCohereRequestExample2": { - "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", - "summary": "A rerank task", - "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + "PutMistralRequestExample1": { + "description": "Run `PUT _inference/text_embedding/mistral-embeddings-test` to create a Mistral inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"mistral\",\n \"service_settings\": {\n \"api_key\": \"Mistral-API-Key\",\n \"model\": \"mistral-embed\" \n }\n}" } }, "inherits": { @@ -27965,24 +27887,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_cohere" + "namespace": "inference.put_mistral" }, "path": [ { - "description": "The type of the inference task that the model will perform.", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "CohereTaskType", - "namespace": "inference.put_cohere" + "name": "MistralTaskType", + "namespace": "inference.put_mistral" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "cohere_inference_id", + "name": "mistral_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27994,7 +27916,7 @@ } ], "query": [], - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" + "specLocation": "inference/put_mistral/PutMistralRequest.ts#L28-L77" }, { "body": { @@ -28010,17 +27932,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" -<<<<<<< HEAD -======= -======= - "namespace": "inference.put_azureopenai" + "namespace": "inference.put_mistral" }, - "specLocation": "inference/put_azureopenai/PutAzureOpenAiResponse.ts#L22-L24" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) + "specLocation": "inference/put_mistral/PutMistralResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28030,32 +27944,70 @@ "kind": "properties", "properties": [ { - "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `openai`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_eis" + "namespace": "inference.put_openai" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "EisServiceSettings", - "namespace": "inference.put_eis" + "name": "OpenAIServiceSettings", + "namespace": "inference.put_openai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenAITaskSettings", + "namespace": "inference.put_openai" } } } ] }, - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" + }, + "PutOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + }, "inherits": { "type": { "name": "RequestBase", @@ -28065,7 +28017,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_eis" + "namespace": "inference.put_openai" }, "path": [ { @@ -28075,14 +28027,14 @@ "type": { "kind": "instance_of", "type": { - "name": "EisTaskType", - "namespace": "inference.put_eis" + "name": "OpenAITaskType", + "namespace": "inference.put_openai" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "eis_inference_id", + "name": "openai_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28094,7 +28046,7 @@ } ], "query": [], - "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L28-L82" }, { "body": { @@ -28110,9 +28062,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_eis" + "namespace": "inference.put_openai" }, - "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28122,88 +28074,36 @@ "kind": "properties", "properties": [ { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `elasticsearch`.", + "description": "The type of service supported for the specified task type. In this case, `watsonxai`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_elasticsearch" + "namespace": "inference.put_watsonx" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `elasticsearch` service.", + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "ElasticsearchServiceSettings", - "namespace": "inference.put_elasticsearch" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "ElasticsearchTaskSettings", - "namespace": "inference.put_elasticsearch" + "name": "WatsonxServiceSettings", + "namespace": "inference.put_watsonx" } } } ] }, - "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create a Watsonx inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutElasticsearchRequestExample1": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The `model_id` must be the ID of one of the built-in ELSER models. The API will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", - "summary": "ELSER sparse embedding task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n },\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\" \n }\n}" - }, - "PutElasticsearchRequestExample2": { - "description": "Run `PUT _inference/rerank/my-elastic-rerank` to create an inference endpoint that performs a rerank task using the built-in Elastic Rerank cross-encoder model. The `model_id` must be `.rerank-v1`, which is the ID of the built-in Elastic Rerank model. The API will automatically download the Elastic Rerank model if it isn't already downloaded and then deploy the model. Once deployed, the model can be used for semantic re-ranking with a `text_similarity_reranker` retriever.", - "summary": "Elastic rerank task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"model_id\": \".rerank-v1\", \n \"num_threads\": 1,\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n }\n }\n}" - }, - "PutElasticsearchRequestExample3": { - "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task. The `model_id` must be the ID of one of the built-in E5 models. The API will automatically download the E5 model if it isn't already downloaded and then deploy the model.", - "summary": "E5 text embedding task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\" \n }\n}" - }, - "PutElasticsearchRequestExample4": { - "description": "Run `PUT _inference/text_embedding/my-msmarco-minilm-model` to create an inference endpoint that performs a `text_embedding` task with a model that was uploaded by Eland.", - "summary": "Eland text embedding task", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \"msmarco-MiniLM-L12-cos-v5\" \n }\n}" - }, - "PutElasticsearchRequestExample5": { - "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task and to configure adaptive allocations. The API request will automatically download the E5 model if it isn't already downloaded and then deploy the model.", - "summary": "Adaptive allocation", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\"\n }\n}" - }, - "PutElasticsearchRequestExample6": { - "description": "Run `PUT _inference/sparse_embedding/use_existing_deployment` to use an already existing model deployment when creating an inference endpoint.", - "summary": "Existing model deployment", - "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"deployment_id\": \".elser_model_2\"\n }\n}" + "InferenceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"watsonxai\",\n \"service_settings\": {\n \"api_key\": \"Watsonx-API-Key\", \n \"url\": \"Wastonx-URL\", \n \"model_id\": \"ibm/slate-30m-english-rtrvr\",\n \"project_id\": \"IBM-Cloud-ID\", \n \"api_version\": \"2024-03-14\"\n }\n}" } }, "inherits": { @@ -28215,24 +28115,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_elasticsearch" + "namespace": "inference.put_watsonx" }, "path": [ { - "description": "The type of the inference task that the model will perform.", + "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "ElasticsearchTaskType", - "namespace": "inference.put_elasticsearch" + "name": "WatsonxTaskType", + "namespace": "inference.put_watsonx" } } }, { - "description": "The unique identifier of the inference endpoint.\nThe must not match the `model_id`.", - "name": "elasticsearch_inference_id", + "description": "The unique identifier of the inference endpoint.", + "name": "watsonx_inference_id", "required": true, "type": { "kind": "instance_of", @@ -28244,7 +28144,7 @@ } ], "query": [], - "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L25-L86" + "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L24-L70" }, { "body": { @@ -28257,18 +28157,12 @@ } } }, - "examples": { - "PutElasticsearchResponseExample1": { - "description": "A successful response from `PUT _inference/sparse_embedding/use_existing_deployment`. It contains the model ID and the threads and allocations settings from the model deployment.\n", - "value": "{\n \"inference_id\": \"use_existing_deployment\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 2,\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\",\n \"deployment_id\": \".elser_model_2\"\n },\n \"chunking_settings\": {\n \"strategy\": \"sentence\",\n \"max_chunk_size\": 250,\n \"sentence_overlap\": 1\n }\n}" - } - }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_elasticsearch" + "namespace": "inference.put_watsonx" }, - "specLocation": "inference/put_elasticsearch/PutElasticsearchResponse.ts#L22-L24" + "specLocation": "inference/put_watsonx/PutWatsonxResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28278,60 +28172,64 @@ "kind": "properties", "properties": [ { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, + "description": "Query input.", + "name": "query", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" + "name": "string", + "namespace": "_builtins" } } }, { - "description": "The type of service supported for the specified task type. In this case, `elser`.", - "name": "service", + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", + "name": "input", "required": true, "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_elser" - } + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "kind": "union_of" } }, { - "description": "Settings used to install the inference model. These settings are specific to the `elser` service.", - "name": "service_settings", - "required": true, + "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", + "name": "task_settings", + "required": false, "type": { "kind": "instance_of", "type": { - "name": "ElserServiceSettings", - "namespace": "inference.put_elser" + "name": "TaskSettings", + "namespace": "inference._types" } } } ] }, - "deprecation": { - "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.", - "version": "8.16.0" - }, - "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Perform rereanking inference on the service", "examples": { - "PutElserRequestExample1": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", - "summary": "A sparse embedding task", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n }\n}" - }, - "PutElserRequestExample2": { - "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.", - "summary": "Adaptive allocations", - "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1\n }\n}" + "RerankRequestExample1": { + "description": "Run `POST _inference/rerank/cohere_rerank` to perform reranking on the example input.", + "summary": "Rerank task", + "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\"\n}" } }, "inherits": { @@ -28343,36 +28241,38 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_elser" + "namespace": "inference.rerank" }, "path": [ { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", + "description": "The unique identifier for the inference endpoint.", + "name": "inference_id", "required": true, "type": { "kind": "instance_of", "type": { - "name": "ElserTaskType", - "namespace": "inference.put_elser" + "name": "Id", + "namespace": "_types" } } - }, + } + ], + "query": [ { - "description": "The unique identifier of the inference endpoint.", - "name": "elser_inference_id", - "required": true, + "description": "The amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", "type": { "kind": "instance_of", "type": { - "name": "Id", + "name": "Duration", "namespace": "_types" } } } ], - "query": [], - "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82" + "specLocation": "inference/rerank/RerankRequest.ts#L25-L72" }, { "body": { @@ -28380,23 +28280,24 @@ "value": { "kind": "instance_of", "type": { - "name": "InferenceEndpointInfo", + "name": "RerankedInferenceResult", "namespace": "inference._types" } } }, "examples": { - "PutElserResponseExample1": { - "description": "A successful response when creating an ELSER inference endpoint.", - "value": "{\n \"inference_id\": \"my-elser-model\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n },\n \"task_settings\": {}\n}" + "RerankResponseExample1": { + "description": "A successful response from `POST _inference/rerank/cohere_rerank`.\n", + "summary": "Rerank task", + "value": "{\n \"rerank\": [\n {\n \"index\": \"2\",\n \"relevance_score\": \"0.011597361\",\n \"text\": \"leia\"\n },\n {\n \"index\": \"0\",\n \"relevance_score\": \"0.006338922\",\n \"text\": \"luke\"\n },\n {\n \"index\": \"5\",\n \"relevance_score\": \"0.0016166499\",\n \"text\": \"star\"\n },\n {\n \"index\": \"4\",\n \"relevance_score\": \"0.0011695103\",\n \"text\": \"r2d2\"\n },\n {\n \"index\": \"1\",\n \"relevance_score\": \"5.614787E-4\",\n \"text\": \"like\"\n },\n {\n \"index\": \"6\",\n \"relevance_score\": \"3.7850367E-4\",\n \"text\": \"wars\"\n },\n {\n \"index\": \"3\",\n \"relevance_score\": \"1.2508839E-5\",\n \"text\": \"chewy\"\n }\n ]\n}" } }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_elser" + "namespace": "inference.rerank" }, - "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24" + "specLocation": "inference/rerank/RerankResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28406,51 +28307,52 @@ "kind": "properties", "properties": [ { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", - "name": "service", + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", "required": true, "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_googleaistudio" - } + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "kind": "union_of" } }, { - "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", - "name": "service_settings", - "required": true, + "description": "Optional task settings", + "name": "task_settings", + "required": false, "type": { "kind": "instance_of", "type": { - "name": "GoogleAiStudioServiceSettings", - "namespace": "inference.put_googleaistudio" + "name": "TaskSettings", + "namespace": "inference._types" } } } ] }, - "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Perform sparse embedding inference on the service", "examples": { - "PutGoogleAiStudioRequestExample1": { - "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + "SparseEmbeddingRequestExample1": { + "description": "Run `POST _inference/sparse_embedding/my-elser-model` to perform sparse embedding on the example sentence.", + "summary": "Sparse embedding task", + "value": "{\n \"input\": \"The sky above the port was the color of television tuned to a dead channel.\"\n}" } }, "inherits": { @@ -28462,36 +28364,38 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.sparse_embedding" }, "path": [ { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", + "description": "The inference Id", + "name": "inference_id", "required": true, "type": { "kind": "instance_of", "type": { - "name": "GoogleAiStudioTaskType", - "namespace": "inference.put_googleaistudio" + "name": "Id", + "namespace": "_types" } } - }, + } + ], + "query": [ { - "description": "The unique identifier of the inference endpoint.", - "name": "googleaistudio_inference_id", - "required": true, + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", "type": { "kind": "instance_of", "type": { - "name": "Id", + "name": "Duration", "namespace": "_types" } } } ], - "query": [], - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" + "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L63" }, { "body": { @@ -28499,17 +28403,24 @@ "value": { "kind": "instance_of", "type": { - "name": "InferenceEndpointInfo", + "name": "SparseEmbeddingInferenceResult", "namespace": "inference._types" } } }, + "examples": { + "SparseEmbeddingResponseExample1": { + "description": "An abbreviated response from `POST _inference/sparse_embedding/my-elser-model`.\n", + "summary": "Sparse embedding task", + "value": "{\n \"sparse_embedding\": [\n {\n \"port\": 2.1259406,\n \"sky\": 1.7073475,\n \"color\": 1.6922266,\n \"dead\": 1.6247464,\n \"television\": 1.3525393,\n \"above\": 1.2425821,\n \"tuned\": 1.1440028,\n \"colors\": 1.1218185,\n \"tv\": 1.0111054,\n \"ports\": 1.0067928,\n \"poem\": 1.0042328,\n \"channel\": 0.99471164,\n \"tune\": 0.96235967,\n \"scene\": 0.9020516\n }\n ]\n}" + } + }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_googleaistudio" + "namespace": "inference.sparse_embedding" }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" + "specLocation": "inference/sparse_embedding/SparseEmbeddingResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -28519,68 +28430,52 @@ "kind": "properties", "properties": [ { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_googlevertexai" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", - "name": "service_settings", + "description": "Inference input.\nEither a string or an array of strings.", + "name": "input", "required": true, "type": { - "kind": "instance_of", - "type": { - "name": "GoogleVertexAIServiceSettings", - "namespace": "inference.put_googlevertexai" - } + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "kind": "union_of" } }, { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "description": "Optional task settings", "name": "task_settings", "required": false, "type": { "kind": "instance_of", "type": { - "name": "GoogleVertexAITaskSettings", - "namespace": "inference.put_googlevertexai" + "name": "TaskSettings", + "namespace": "inference._types" } } } ] }, - "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Perform text embedding inference on the service", "examples": { - "PutGoogleVertexAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" - }, - "PutGoogleVertexAiRequestExample2": { - "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", - "summary": "A rerank task", - "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + "TextEmbeddingRequestExample1": { + "description": "Run `POST _inference/text_embedding/my-cohere-endpoint` to perform text embedding on the example sentence using the Cohere integration,", + "summary": "Text embedding task", + "value": "{\n \"input\": \"The sky above the port was the color of television tuned to a dead channel.\",\n \"task_settings\": {\n \"input_type\": \"ingest\"\n }\n}" } }, "inherits": { @@ -28592,36 +28487,38 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.text_embedding" }, "path": [ { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", + "description": "The inference Id", + "name": "inference_id", "required": true, "type": { "kind": "instance_of", "type": { - "name": "GoogleVertexAITaskType", - "namespace": "inference.put_googlevertexai" + "name": "Id", + "namespace": "_types" } } - }, + } + ], + "query": [ { - "description": "The unique identifier of the inference endpoint.", - "name": "googlevertexai_inference_id", - "required": true, + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", "type": { "kind": "instance_of", "type": { - "name": "Id", + "name": "Duration", "namespace": "_types" } } } ], - "query": [], - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" + "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L63" }, { "body": { @@ -28629,78 +28526,133 @@ "value": { "kind": "instance_of", "type": { - "name": "InferenceEndpointInfo", + "name": "TextEmbeddingInferenceResult", "namespace": "inference._types" } } }, + "examples": { + "TextEmbeddingResponseExample1": { + "description": "An abbreviated response from `POST _inference/text_embedding/my-cohere-endpoint`.\n", + "summary": "Text embedding task", + "value": "{\n \"text_embedding\": [\n {\n \"embedding\": [\n {\n 0.018569946,\n -0.036895752,\n 0.01486969,\n -0.0045204163,\n -0.04385376,\n 0.0075950623,\n 0.04260254,\n -0.004005432,\n 0.007865906,\n 0.030792236,\n -0.050476074,\n 0.011795044,\n -0.011642456,\n -0.010070801\n }\n ]\n }\n ]\n}" + } + }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_googlevertexai" + "namespace": "inference.text_embedding" }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" + "specLocation": "inference/text_embedding/TextEmbeddingResponse.ts#L22-L24" }, { "attachedBehaviors": [ "CommonQueryParameters" ], + "body": { + "kind": "no_body" + }, + "description": "Get cluster info.\nGet basic build, version, and cluster information.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "_global.info" + }, + "path": [], + "query": [], + "specLocation": "_global/info/RootNodeInfoRequest.ts#L22-L39" + }, + { "body": { "kind": "properties", "properties": [ { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, + "description": "The responding cluster's name.", + "name": "cluster_name", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" + "name": "Name", + "namespace": "_types" } } }, { -<<<<<<< HEAD ->>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) -======= ->>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", - "name": "service", + "name": "cluster_uuid", "required": true, "type": { "kind": "instance_of", "type": { - "name": "ServiceType", - "namespace": "inference.put_hugging_face" + "name": "Uuid", + "namespace": "_types" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", - "name": "service_settings", + "description": "The responding node's name.", + "name": "name", "required": true, "type": { "kind": "instance_of", "type": { - "name": "HuggingFaceServiceSettings", - "namespace": "inference.put_hugging_face" + "name": "Name", + "namespace": "_types" + } + } + }, + { + "name": "tagline", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The running version of Elasticsearch.", + "name": "version", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ElasticsearchVersionInfo", + "namespace": "_types" } } } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutHuggingFaceRequestExample1": { - "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + "RootNodeInfoResponseExample1": { + "description": "A successful response from `GET /`s.", + "value": "{\n \"name\": \"instance-0000000000\",\n \"cluster_name\": \"my_test_cluster\",\n \"cluster_uuid\": \"5QaxoN0pRZuOmWSxstBBwQ\",\n \"version\": {\n \"build_date\": \"2024-02-01T13:07:13.727175297Z\",\n \"minimum_wire_compatibility_version\": \"7.17.0\",\n \"build_hash\": \"6185ba65d27469afabc9bc951cded6c17c21e3f3\",\n \"number\": \"8.12.1\",\n \"lucene_version\": \"9.9.2\",\n \"minimum_index_compatibility_version\": \"7.0.0\",\n \"build_flavor\": \"default\",\n \"build_snapshot\": false,\n \"build_type\": \"docker\"\n },\n \"tagline\": \"You Know, for Search\"\n}" } }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "_global.info" + }, + "specLocation": "_global/info/RootNodeInfoResponse.ts#L23-L40" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "no_body" + }, + "description": "Delete pipelines.\nDelete one or more ingest pipelines.", "inherits": { "type": { "name": "RequestBase", @@ -28710,1032 +28662,12 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_hugging_face" + "namespace": "ingest.delete_pipeline" }, "path": [ { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "HuggingFaceTaskType", - "namespace": "inference.put_hugging_face" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "huggingface_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_hugging_face" - }, - "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { -<<<<<<< HEAD -======= - "description": "The type of service supported for the specified task type. In this case, `jinaai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_jinaai" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `jinaai` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "JinaAIServiceSettings", - "namespace": "inference.put_jinaai" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "JinaAITaskSettings", - "namespace": "inference.put_jinaai" - } - } - } - ] - }, - "description": "Create an JinaAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutJinaAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" - }, - "PutJinaAiRequestExample2": { - "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", - "summary": "A rerank task", - "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_jinaai" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "JinaAITaskType", - "namespace": "inference.put_jinaai" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "jinaai_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L28-L84" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_jinaai" - }, - "specLocation": "inference/put_jinaai/PutJinaAiResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { - "description": "The type of service supported for the specified task type. In this case, `mistral`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_mistral" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `mistral` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "MistralServiceSettings", - "namespace": "inference.put_mistral" - } - } - } - ] - }, - "description": "Create a Mistral inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `mistral` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutMistralRequestExample1": { - "description": "Run `PUT _inference/text_embedding/mistral-embeddings-test` to create a Mistral inference endpoint that performs a text embedding task.", - "value": "{\n \"service\": \"mistral\",\n \"service_settings\": {\n \"api_key\": \"Mistral-API-Key\",\n \"model\": \"mistral-embed\" \n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_mistral" - }, - "path": [ - { - "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "MistralTaskType", - "namespace": "inference.put_mistral" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "mistral_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_mistral/PutMistralRequest.ts#L28-L77" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_mistral" - }, - "specLocation": "inference/put_mistral/PutMistralResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The chunking configuration object.", - "extDocId": "inference-chunking", - "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", - "name": "chunking_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InferenceChunkingSettings", - "namespace": "inference._types" - } - } - }, - { ->>>>>>> f7c35e7b1 (Add Mistral inference details (#3997)) - "description": "The type of service supported for the specified task type. In this case, `openai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_openai" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "OpenAIServiceSettings", - "namespace": "inference.put_openai" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "OpenAITaskSettings", - "namespace": "inference.put_openai" - } - } - } - ] - }, - "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutOpenAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" - }, - "PutOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_openai" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "OpenAITaskType", - "namespace": "inference.put_openai" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "openai_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L28-L82" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_openai" - }, - "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "The type of service supported for the specified task type. In this case, `watsonxai`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_watsonx" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "WatsonxServiceSettings", - "namespace": "inference.put_watsonx" - } - } - } - ] - }, - "description": "Create a Watsonx inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "InferenceRequestExample1": { - "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.", - "value": "{\n \"service\": \"watsonxai\",\n \"service_settings\": {\n \"api_key\": \"Watsonx-API-Key\", \n \"url\": \"Wastonx-URL\", \n \"model_id\": \"ibm/slate-30m-english-rtrvr\",\n \"project_id\": \"IBM-Cloud-ID\", \n \"api_version\": \"2024-03-14\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_watsonx" - }, - "path": [ - { - "description": "The task type.\nThe only valid task type for the model to perform is `text_embedding`.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "WatsonxTaskType", - "namespace": "inference.put_watsonx" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "watsonx_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_watsonx/PutWatsonxRequest.ts#L24-L70" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_watsonx" - }, - "specLocation": "inference/put_watsonx/PutWatsonxResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "Query input.", - "name": "query", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", - "name": "input", - "required": true, - "type": { - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ], - "kind": "union_of" - } - }, - { - "description": "Task settings for the individual inference request.\nThese settings are specific to the task type you specified and override the task settings specified when initializing the service.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskSettings", - "namespace": "inference._types" - } - } - } - ] - }, - "description": "Perform rereanking inference on the service", - "examples": { - "RerankRequestExample1": { - "description": "Run `POST _inference/rerank/cohere_rerank` to perform reranking on the example input.", - "summary": "Rerank task", - "value": "{\n \"input\": [\"luke\", \"like\", \"leia\", \"chewy\",\"r2d2\", \"star\", \"wars\"],\n \"query\": \"star wars main character\"\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.rerank" - }, - "path": [ - { - "description": "The unique identifier for the inference endpoint.", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "The amount of time to wait for the inference request to complete.", - "name": "timeout", - "required": false, - "serverDefault": "30s", - "type": { - "kind": "instance_of", - "type": { - "name": "Duration", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/rerank/RerankRequest.ts#L25-L72" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "RerankedInferenceResult", - "namespace": "inference._types" - } - } - }, - "examples": { - "RerankResponseExample1": { - "description": "A successful response from `POST _inference/rerank/cohere_rerank`.\n", - "summary": "Rerank task", - "value": "{\n \"rerank\": [\n {\n \"index\": \"2\",\n \"relevance_score\": \"0.011597361\",\n \"text\": \"leia\"\n },\n {\n \"index\": \"0\",\n \"relevance_score\": \"0.006338922\",\n \"text\": \"luke\"\n },\n {\n \"index\": \"5\",\n \"relevance_score\": \"0.0016166499\",\n \"text\": \"star\"\n },\n {\n \"index\": \"4\",\n \"relevance_score\": \"0.0011695103\",\n \"text\": \"r2d2\"\n },\n {\n \"index\": \"1\",\n \"relevance_score\": \"5.614787E-4\",\n \"text\": \"like\"\n },\n {\n \"index\": \"6\",\n \"relevance_score\": \"3.7850367E-4\",\n \"text\": \"wars\"\n },\n {\n \"index\": \"3\",\n \"relevance_score\": \"1.2508839E-5\",\n \"text\": \"chewy\"\n }\n ]\n}" - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.rerank" - }, - "specLocation": "inference/rerank/RerankResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "Inference input.\nEither a string or an array of strings.", - "name": "input", - "required": true, - "type": { - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ], - "kind": "union_of" - } - }, - { - "description": "Optional task settings", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskSettings", - "namespace": "inference._types" - } - } - } - ] - }, - "description": "Perform sparse embedding inference on the service", - "examples": { - "SparseEmbeddingRequestExample1": { - "description": "Run `POST _inference/sparse_embedding/my-elser-model` to perform sparse embedding on the example sentence.", - "summary": "Sparse embedding task", - "value": "{\n \"input\": \"The sky above the port was the color of television tuned to a dead channel.\"\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.sparse_embedding" - }, - "path": [ - { - "description": "The inference Id", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "Specifies the amount of time to wait for the inference request to complete.", - "name": "timeout", - "required": false, - "serverDefault": "30s", - "type": { - "kind": "instance_of", - "type": { - "name": "Duration", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/sparse_embedding/SparseEmbeddingRequest.ts#L25-L63" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "SparseEmbeddingInferenceResult", - "namespace": "inference._types" - } - } - }, - "examples": { - "SparseEmbeddingResponseExample1": { - "description": "An abbreviated response from `POST _inference/sparse_embedding/my-elser-model`.\n", - "summary": "Sparse embedding task", - "value": "{\n \"sparse_embedding\": [\n {\n \"port\": 2.1259406,\n \"sky\": 1.7073475,\n \"color\": 1.6922266,\n \"dead\": 1.6247464,\n \"television\": 1.3525393,\n \"above\": 1.2425821,\n \"tuned\": 1.1440028,\n \"colors\": 1.1218185,\n \"tv\": 1.0111054,\n \"ports\": 1.0067928,\n \"poem\": 1.0042328,\n \"channel\": 0.99471164,\n \"tune\": 0.96235967,\n \"scene\": 0.9020516\n }\n ]\n}" - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.sparse_embedding" - }, - "specLocation": "inference/sparse_embedding/SparseEmbeddingResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { - "description": "Inference input.\nEither a string or an array of strings.", - "name": "input", - "required": true, - "type": { - "items": [ - { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - }, - { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ], - "kind": "union_of" - } - }, - { - "description": "Optional task settings", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskSettings", - "namespace": "inference._types" - } - } - } - ] - }, - "description": "Perform text embedding inference on the service", - "examples": { - "TextEmbeddingRequestExample1": { - "description": "Run `POST _inference/text_embedding/my-cohere-endpoint` to perform text embedding on the example sentence using the Cohere integration,", - "summary": "Text embedding task", - "value": "{\n \"input\": \"The sky above the port was the color of television tuned to a dead channel.\",\n \"task_settings\": {\n \"input_type\": \"ingest\"\n }\n}" - } - }, - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.text_embedding" - }, - "path": [ - { - "description": "The inference Id", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [ - { - "description": "Specifies the amount of time to wait for the inference request to complete.", - "name": "timeout", - "required": false, - "serverDefault": "30s", - "type": { - "kind": "instance_of", - "type": { - "name": "Duration", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/text_embedding/TextEmbeddingRequest.ts#L25-L63" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "TextEmbeddingInferenceResult", - "namespace": "inference._types" - } - } - }, - "examples": { - "TextEmbeddingResponseExample1": { - "description": "An abbreviated response from `POST _inference/text_embedding/my-cohere-endpoint`.\n", - "summary": "Text embedding task", - "value": "{\n \"text_embedding\": [\n {\n \"embedding\": [\n {\n 0.018569946,\n -0.036895752,\n 0.01486969,\n -0.0045204163,\n -0.04385376,\n 0.0075950623,\n 0.04260254,\n -0.004005432,\n 0.007865906,\n 0.030792236,\n -0.050476074,\n 0.011795044,\n -0.011642456,\n -0.010070801\n }\n ]\n }\n ]\n}" - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.text_embedding" - }, - "specLocation": "inference/text_embedding/TextEmbeddingResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "no_body" - }, - "description": "Get cluster info.\nGet basic build, version, and cluster information.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "_global.info" - }, - "path": [], - "query": [], - "specLocation": "_global/info/RootNodeInfoRequest.ts#L22-L39" - }, - { - "body": { - "kind": "properties", - "properties": [ - { - "description": "The responding cluster's name.", - "name": "cluster_name", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Name", - "namespace": "_types" - } - } - }, - { - "name": "cluster_uuid", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Uuid", - "namespace": "_types" - } - } - }, - { - "description": "The responding node's name.", - "name": "name", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Name", - "namespace": "_types" - } - } - }, - { - "name": "tagline", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The running version of Elasticsearch.", - "name": "version", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ElasticsearchVersionInfo", - "namespace": "_types" - } - } - } - ] - }, - "examples": { - "RootNodeInfoResponseExample1": { - "description": "A successful response from `GET /`s.", - "value": "{\n \"name\": \"instance-0000000000\",\n \"cluster_name\": \"my_test_cluster\",\n \"cluster_uuid\": \"5QaxoN0pRZuOmWSxstBBwQ\",\n \"version\": {\n \"build_date\": \"2024-02-01T13:07:13.727175297Z\",\n \"minimum_wire_compatibility_version\": \"7.17.0\",\n \"build_hash\": \"6185ba65d27469afabc9bc951cded6c17c21e3f3\",\n \"number\": \"8.12.1\",\n \"lucene_version\": \"9.9.2\",\n \"minimum_index_compatibility_version\": \"7.0.0\",\n \"build_flavor\": \"default\",\n \"build_snapshot\": false,\n \"build_type\": \"docker\"\n },\n \"tagline\": \"You Know, for Search\"\n}" - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "_global.info" - }, - "specLocation": "_global/info/RootNodeInfoResponse.ts#L23-L40" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "no_body" - }, - "description": "Delete pipelines.\nDelete one or more ingest pipelines.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "ingest.delete_pipeline" - }, - "path": [ - { - "description": "Pipeline ID or wildcard expression of pipeline IDs used to limit the request.\nTo delete all ingest pipelines in a cluster, use a value of `*`.", - "name": "id", + "description": "Pipeline ID or wildcard expression of pipeline IDs used to limit the request.\nTo delete all ingest pipelines in a cluster, use a value of `*`.", + "name": "id", "required": true, "type": { "kind": "instance_of", @@ -102339,12 +101271,9 @@ "kind": "enum", "members": [ { -<<<<<<< HEAD -======= "name": "completion" }, { -<<<<<<< HEAD "name": "rerank" }, { @@ -102373,63 +101302,6 @@ }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" }, - { - "kind": "enum", - "members": [ - { - "name": "completion" - }, - { -======= ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "text_embedding" - } - ], - "name": { - "name": "AmazonBedrockTaskType", - "namespace": "inference.put_amazonbedrock" - }, - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" - }, - { - "kind": "enum", - "members": [ - { - "name": "amazonbedrock" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_amazonbedrock" - }, - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" - }, - { - "kind": "enum", - "members": [ - { - "name": "completion" - } - ], - "name": { - "name": "AnthropicTaskType", - "namespace": "inference.put_anthropic" - }, - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L84-L86" - }, - { - "kind": "enum", - "members": [ - { - "name": "anthropic" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_anthropic" - }, - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" - }, { "kind": "enum", "members": [ @@ -102466,151 +101338,6 @@ "name": "completion" }, { -<<<<<<< HEAD - "name": "text_embedding" - } - ], - "name": { - "name": "AzureOpenAITaskType", - "namespace": "inference.put_azureopenai" - }, - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L90-L93" - }, - { - "kind": "enum", - "members": [ - { - "name": "azureopenai" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_azureopenai" - }, - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" - }, - { - "kind": "enum", - "members": [ - { - "name": "completion" - }, - { -======= ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "rerank" - }, - { - "name": "text_embedding" - } - ], - "name": { - "name": "CohereTaskType", - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L84-L88" - }, - { - "kind": "enum", - "members": [ - { - "name": "byte" - }, - { - "name": "float" - }, - { - "name": "int8" - } - ], - "name": { - "name": "EmbeddingType", - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L94-L98" - }, - { - "kind": "enum", - "members": [ - { - "name": "classification" - }, - { - "name": "clustering" - }, - { - "name": "ingest" - }, - { - "name": "search" - } - ], - "name": { - "name": "InputType", - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L100-L105" - }, - { - "kind": "enum", - "members": [ - { - "name": "cohere" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L90-L92" - }, - { - "kind": "enum", - "members": [ - { - "name": "cosine" - }, - { - "name": "dot_product" - }, - { - "name": "l2_norm" - } - ], - "name": { - "name": "SimilarityType", - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L107-L111" - }, - { - "kind": "enum", - "members": [ - { - "name": "END" - }, - { - "name": "NONE" - }, - { - "name": "START" - } - ], - "name": { - "name": "TruncateType", - "namespace": "inference.put_cohere" - }, - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L113-L117" - }, - { - "kind": "enum", - "members": [ - { -<<<<<<< HEAD -======= - "name": "completion" - }, - { "name": "text_embedding" } ], @@ -102637,154 +101364,6 @@ "kind": "enum", "members": [ { ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "chat_completion" - } - ], - "name": { - "name": "EisTaskType", - "namespace": "inference.put_eis" - }, - "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66" - }, - { - "kind": "enum", - "members": [ - { - "name": "elastic" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_eis" - }, - "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" - }, - { - "kind": "enum", - "members": [ - { - "name": "rerank" - }, - { - "name": "sparse_embedding" - }, - { - "name": "text_embedding" - } - ], - "name": { - "name": "ElasticsearchTaskType", - "namespace": "inference.put_elasticsearch" - }, - "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L88-L92" - }, - { - "kind": "enum", - "members": [ - { - "name": "elasticsearch" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_elasticsearch" - }, - "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L94-L96" - }, - { - "kind": "enum", - "members": [ - { - "name": "sparse_embedding" - } - ], - "name": { - "name": "ElserTaskType", - "namespace": "inference.put_elser" - }, - "specLocation": "inference/put_elser/PutElserRequest.ts#L84-L86" - }, - { - "kind": "enum", - "members": [ - { - "name": "elser" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_elser" - }, - "specLocation": "inference/put_elser/PutElserRequest.ts#L88-L90" - }, - { - "kind": "enum", - "members": [ - { - "name": "completion" - }, - { - "name": "text_embedding" - } - ], - "name": { - "name": "GoogleAiStudioTaskType", - "namespace": "inference.put_googleaistudio" - }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L77-L80" - }, - { - "kind": "enum", - "members": [ - { - "name": "googleaistudio" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_googleaistudio" - }, - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" - }, - { - "kind": "enum", - "members": [ - { - "name": "rerank" - }, - { - "name": "text_embedding" - } - ], - "name": { - "name": "GoogleVertexAITaskType", - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L83-L86" - }, - { - "kind": "enum", - "members": [ - { - "name": "googlevertexai" - } - ], - "name": { - "name": "ServiceType", - "namespace": "inference.put_googlevertexai" - }, - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" - }, - { - "kind": "enum", - "members": [ - { -<<<<<<< HEAD ->>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) -======= ->>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "text_embedding" } ], @@ -102811,8 +101390,6 @@ "kind": "enum", "members": [ { -<<<<<<< HEAD -======= "name": "rerank" }, { @@ -102909,7 +101486,6 @@ "kind": "enum", "members": [ { ->>>>>>> f7c35e7b1 (Add Mistral inference details (#3997)) "name": "chat_completion" }, { @@ -123315,414 +121891,23 @@ "type": { "kind": "instance_of", "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/_types/Services.ts#L60-L89" - }, - { - "kind": "interface", - "name": { -<<<<<<< HEAD -======= -<<<<<<< HEAD - "name": "AlibabaCloudServiceSettings", - "namespace": "inference.put_alibabacloud" - }, - "properties": [ - { - "description": "A valid API key for the AlibabaCloud AI Search API.", - "name": "api_key", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", - "extDocId": "alibabacloud-api-keys", - "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", - "name": "host", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", - "name": "rate_limit", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "RateLimitSetting", - "namespace": "inference._types" - } - } - }, - { - "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max รท qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", - "name": "service_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The name of the workspace used for the inference task.", - "name": "workspace", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" - }, - { - "kind": "interface", - "name": { - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/_types/Services.ts#L95-L100" - }, - { - "kind": "interface", - "name": { - "name": "AlibabaCloudTaskSettings", - "namespace": "inference.put_alibabacloud" - }, - "properties": [ - { - "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", - "name": "input_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", - "name": "return_token", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154" - }, - { - "kind": "interface", - "name": { -======= -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "AmazonBedrockServiceSettings", - "namespace": "inference.put_amazonbedrock" - }, - "properties": [ - { - "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", - "name": "access_key", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", - "extDocId": "amazonbedrock-models", - "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", - "name": "model", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", - "name": "provider", -<<<<<<< HEAD -======= -======= - "name": "AzureOpenAIServiceSettings", - "namespace": "inference.put_azureopenai" - }, - "properties": [ - { - "description": "A valid API key for your Azure OpenAI account.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", - "extDocId": "azureopenai-auth", - "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", - "name": "api_key", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", - "extDocId": "amazonbedrock-models", - "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", - "name": "region", -<<<<<<< HEAD -======= -======= - "description": "The Azure API version ID to use.\nIt is recommended to use the latest supported non-preview version.", - "name": "api_version", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { -<<<<<<< HEAD - "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", -======= -<<<<<<< HEAD - "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", -======= - "description": "The deployment name of your deployed models.\nYour Azure OpenAI deployments can be found though the Azure OpenAI Studio portal that is linked to your subscription.", - "extDocId": "azureopenai", - "extDocUrl": "https://oai.azure.com/", - "name": "deployment_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "A valid Microsoft Entra token.\nYou must specify either `api_key` or `entra_id`.\nIf you do not provide either or you provide both, you will receive an error when you try to create your model.", - "extDocId": "azureopenai-auth", - "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#authentication", - "name": "entra_id", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", - "extDocId": "azureopenai-quota-limits", - "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "rate_limit", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "RateLimitSetting", - "namespace": "inference._types" - } - } - }, - { -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", - "extDocId": "amazonbedrock-secret-keys", - "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", - "name": "secret_key", -<<<<<<< HEAD -======= -======= - "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", - "extDocId": "azureopenai-portal", - "extDocUrl": "https://portal.azure.com/#view/HubsExtension/BrowseAll", - "name": "resource_name", ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ], -<<<<<<< HEAD - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" -======= -<<<<<<< HEAD - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" -======= - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - }, - { - "kind": "interface", - "name": { -<<<<<<< HEAD -======= - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/_types/Services.ts#L95-L100" - }, - { - "kind": "interface", - "name": { -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "AmazonBedrockTaskSettings", - "namespace": "inference.put_amazonbedrock" - }, - "properties": [ - { - "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", - "name": "max_new_tokens", - "required": false, - "serverDefault": 64, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - }, - { - "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", - "name": "temperature", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, - { - "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", - "name": "top_k", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, - { - "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", - "name": "top_p", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" + "name": "string", + "namespace": "_builtins" } } } ], - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" + "specLocation": "inference/_types/Services.ts#L60-L89" }, { "kind": "interface", "name": { - "name": "AnthropicServiceSettings", - "namespace": "inference.put_anthropic" + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" }, "properties": [ { - "description": "A valid API key for the Anthropic API.", + "description": "A valid API key for the AlibabaCloud AI Search API.", "name": "api_key", "required": true, "type": { @@ -123734,9 +121919,10 @@ } }, { - "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", - "extDocId": "anothropic-models", - "name": "model_id", + "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", + "extDocId": "alibabacloud-api-keys", + "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", + "name": "host", "required": true, "type": { "kind": "instance_of", @@ -123747,7 +121933,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", + "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", "name": "rate_limit", "required": false, "type": { @@ -123757,46 +121943,44 @@ "namespace": "inference._types" } } - } - ], - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" - }, - { - "kind": "interface", - "name": { - "name": "AnthropicTaskSettings", - "namespace": "inference.put_anthropic" - }, - "properties": [ + }, { - "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", - "name": "max_tokens", + "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max รท qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", + "name": "service_id", "required": true, "type": { "kind": "instance_of", "type": { - "name": "integer", - "namespace": "_types" + "name": "string", + "namespace": "_builtins" } } }, { - "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", - "extDocId": "anthropic-messages", - "extDocUrl": "https://docs.anthropic.com/en/api/messages", - "name": "temperature", - "required": false, + "description": "The name of the workspace used for the inference task.", + "name": "workspace", + "required": true, "type": { "kind": "instance_of", "type": { - "name": "float", - "namespace": "_types" + "name": "string", + "namespace": "_builtins" } } - }, + } + ], + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" + }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ { - "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", - "name": "top_k", + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", "required": false, "type": { "kind": "instance_of", @@ -123805,21 +121989,43 @@ "namespace": "_types" } } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" + }, + "properties": [ + { + "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } }, { - "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", - "name": "top_p", + "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", + "name": "return_token", "required": false, "type": { "kind": "instance_of", "type": { - "name": "float", - "namespace": "_types" + "name": "boolean", + "namespace": "_builtins" } } } ], - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135" + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154" }, { "kind": "interface", @@ -123953,7 +122159,6 @@ }, { "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", -<<<<<<< HEAD "name": "user", "required": false, "type": { @@ -124062,19 +122267,12 @@ { "kind": "interface", "name": { -======= -======= ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "AzureOpenAITaskSettings", "namespace": "inference.put_azureopenai" }, "properties": [ { "description": "For a `completion` or `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", -<<<<<<< HEAD -======= ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "user", "required": false, "type": { @@ -124086,24 +122284,19 @@ } } ], -<<<<<<< HEAD "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" -======= -<<<<<<< HEAD - "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) }, { "kind": "interface", "name": { - "name": "CohereServiceSettings", - "namespace": "inference.put_cohere" + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" }, "properties": [ { - "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", - "extDocId": "cohere-api-keys", - "extDocUrl": "https://dashboard.cohere.com/api-keys", + "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "huggingface-tokens", + "extDocUrl": "https://huggingface.co/settings/tokens", "name": "api_key", "required": true, "type": { @@ -124115,142 +122308,7 @@ } }, { - "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.", - "name": "embedding_type", - "required": false, - "serverDefault": "float", - "type": { - "kind": "instance_of", - "type": { - "name": "EmbeddingType", - "namespace": "inference.put_cohere" - } - } - }, - { - "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", - "name": "model_id", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", - "name": "rate_limit", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "RateLimitSetting", - "namespace": "inference._types" - } - } - }, - { - "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.", - "name": "similarity", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "SimilarityType", - "namespace": "inference.put_cohere" - } - } - } - ], - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160" - }, - { - "kind": "interface", - "name": { - "name": "CohereTaskSettings", - "namespace": "inference.put_cohere" - }, - "properties": [ - { - "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.", - "name": "input_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "InputType", - "namespace": "inference.put_cohere" - } - } - }, - { - "description": "For a `rerank` task, return doc text within the results.", - "name": "return_documents", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - }, - { - "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", - "name": "top_n", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - }, - { - "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.", - "name": "truncate", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TruncateType", - "namespace": "inference.put_cohere" - } - } - } - ], - "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" -<<<<<<< HEAD -======= -======= - "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L146-L152" ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - }, - { - "kind": "interface", - "name": { - "name": "EisServiceSettings", - "namespace": "inference.put_eis" - }, - "properties": [ - { - "description": "The name of the model to use for the inference task.", - "name": "model_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", "name": "rate_limit", "required": false, "type": { @@ -124260,50 +122318,10 @@ "namespace": "inference._types" } } - } - ], - "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82" - }, - { - "kind": "interface", - "name": { -<<<<<<< HEAD -======= -<<<<<<< HEAD ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "ElasticsearchServiceSettings", - "namespace": "inference.put_elasticsearch" - }, - "properties": [ - { - "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.", - "name": "adaptive_allocations", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "AdaptiveAllocations", - "namespace": "inference.put_elasticsearch" - } - } - }, - { - "description": "The deployment identifier for a trained model deployment.\nWhen `deployment_id` is used the `model_id` is optional.", - "name": "deployment_id", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } }, { - "description": "The name of the model to use for the inference task.\nIt can be the ID of a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model that was uploaded by using the Eland client.", - "extDocId": "eland-import", - "extDocUrl": "https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-import-model.html#ml-nlp-import-script", - "name": "model_id", + "description": "The URL endpoint to use for the requests.", + "name": "url", "required": true, "type": { "kind": "instance_of", @@ -124312,206 +122330,21 @@ "namespace": "_builtins" } } - }, - { - "description": "The total number of allocations that are assigned to the model across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations are enabled, do not set this value because it's automatically set.", - "name": "num_allocations", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - }, - { - "description": "The number of threads used by each model allocation during inference.\nThis setting generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.", - "name": "num_threads", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L117-L151" - }, - { - "kind": "interface", - "name": { - "name": "AdaptiveAllocations", - "namespace": "inference.put_elasticsearch" - }, - "properties": [ - { - "description": "Turn on `adaptive_allocations`.", - "name": "enabled", - "required": false, - "serverDefault": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - }, - { - "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.", - "name": "max_number_of_allocations", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - }, - { - "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", - "name": "min_number_of_allocations", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L98-L115" - }, - { - "kind": "interface", - "name": { - "name": "ElasticsearchTaskSettings", - "namespace": "inference.put_elasticsearch" - }, - "properties": [ - { - "description": "For a `rerank` task, return the document instead of only the index.", - "name": "return_documents", - "required": false, - "serverDefault": true, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/put_elasticsearch/PutElasticsearchRequest.ts#L153-L159" - }, - { - "kind": "interface", - "name": { - "name": "ElserServiceSettings", - "namespace": "inference.put_elser" - }, - "properties": [ - { - "description": "Adaptive allocations configuration details.\nIf `enabled` is true, the number of allocations of the model is set based on the current load the process gets.\nWhen the load is high, a new model allocation is automatically created, respecting the value of `max_number_of_allocations` if it's set.\nWhen the load is low, a model allocation is automatically removed, respecting the value of `min_number_of_allocations` if it's set.\nIf `enabled` is true, do not set the number of allocations manually.", - "name": "adaptive_allocations", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "AdaptiveAllocations", - "namespace": "inference.put_elser" - } - } - }, - { - "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.", - "name": "num_allocations", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - }, - { - "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.", - "name": "num_threads", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/put_elser/PutElserRequest.ts#L111-L137" - }, - { - "kind": "interface", - "name": { - "name": "AdaptiveAllocations", - "namespace": "inference.put_elser" - }, - "properties": [ - { - "description": "Turn on `adaptive_allocations`.", - "name": "enabled", - "required": false, - "serverDefault": false, - "type": { - "kind": "instance_of", - "type": { - "name": "boolean", - "namespace": "_builtins" - } - } - }, - { - "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.", - "name": "max_number_of_allocations", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - }, - { - "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", - "name": "min_number_of_allocations", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } } ], - "specLocation": "inference/put_elser/PutElserRequest.ts#L92-L109" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120" }, { "kind": "interface", "name": { - "name": "GoogleAiStudioServiceSettings", - "namespace": "inference.put_googleaistudio" + "name": "JinaAIServiceSettings", + "namespace": "inference.put_jinaai" }, "properties": [ { - "description": "A valid API key of your Google Gemini account.", + "description": "A valid API key of your JinaAI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "jinaAi-embeddings", + "extDocUrl": "https://jina.ai/embeddings/", "name": "api_key", "required": true, "type": { @@ -124523,73 +122356,9 @@ } }, { - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", - "extDocId": "googleaistudio-models", - "extDocUrl": "https://ai.google.dev/gemini-api/docs/models", + "description": "The name of the model to use for the inference task.\nFor a `rerank` task, it is required.\nFor a `text_embedding` task, it is optional.", "name": "model_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "This setting helps to minimize the number of rate limit errors returned from Google AI Studio.\nBy default, the `googleaistudio` service sets the number of requests allowed per minute to 360.", - "name": "rate_limit", "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "RateLimitSetting", - "namespace": "inference._types" - } - } - } - ], - "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L86-L102" - }, - { - "kind": "interface", - "name": { - "name": "GoogleVertexAIServiceSettings", - "namespace": "inference.put_googlevertexai" - }, - "properties": [ - { - "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", - "extDocId": "googlevertexai-locations", - "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations", - "name": "location", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", - "extDocId": "googlevertexai-models", - "extDocUrl": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api", - "name": "model_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The name of the project to use for the inference task.", - "name": "project_id", - "required": true, "type": { "kind": "instance_of", "type": { @@ -124599,7 +122368,9 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Google Vertex AI.\nBy default, the `googlevertexai` service sets the number of requests allowed per minute to 30.000.", + "description": "This setting helps to minimize the number of rate limit errors returned from JinaAI.\nBy default, the `jinaai` service sets the number of requests allowed per minute to 2000 for all task types.", + "extDocId": "jinaAi-rate-limit", + "extDocUrl": "https://jina.ai/contact-sales/#rate-limit", "name": "rate_limit", "required": false, "type": { @@ -124611,30 +122382,30 @@ } }, { - "description": "A valid service account in JSON format for the Google Vertex AI API.", - "name": "service_account_json", - "required": true, + "description": "For a `text_embedding` task, the similarity measure. One of cosine, dot_product, l2_norm.\nThe default values varies with the embedding type.\nFor example, a float embedding type uses a `dot_product` similarity measure by default.", + "name": "similarity", + "required": false, "type": { "kind": "instance_of", "type": { - "name": "string", - "namespace": "_builtins" + "name": "SimilarityType", + "namespace": "inference.put_jinaai" } } } ], - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L108-L137" }, { "kind": "interface", "name": { - "name": "GoogleVertexAITaskSettings", - "namespace": "inference.put_googlevertexai" + "name": "JinaAITaskSettings", + "namespace": "inference.put_jinaai" }, "properties": [ { - "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.", - "name": "auto_truncate", + "description": "For a `rerank` task, return the doc text within the results.", + "name": "return_documents", "required": false, "type": { "kind": "instance_of", @@ -124645,83 +122416,20 @@ } }, { - "description": "For a `rerank` task, the number of the top N documents that should be returned.", - "name": "top_n", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L120-L129" - }, - { - "kind": "interface", - "name": { -<<<<<<< HEAD ->>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) -======= ->>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) - "name": "HuggingFaceServiceSettings", - "namespace": "inference.put_hugging_face" - }, - "properties": [ - { - "description": "A valid access token for your HuggingFace account.\nYou can create or find your access tokens on the HuggingFace settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", - "extDocId": "huggingface-tokens", - "extDocUrl": "https://huggingface.co/settings/tokens", - "name": "api_key", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", - "name": "rate_limit", + "description": "For a `text_embedding` task, the task passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.", + "name": "task", "required": false, "type": { "kind": "instance_of", "type": { - "name": "RateLimitSetting", - "namespace": "inference._types" + "name": "TextEmbeddingTask", + "namespace": "inference.put_jinaai" } } }, { - "description": "The URL endpoint to use for the requests.", - "name": "url", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - ], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L99-L120" - }, - { - "kind": "interface", - "name": { - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "name": "top_n", "required": false, "type": { "kind": "instance_of", @@ -124732,12 +122440,11 @@ } } ], - "specLocation": "inference/_types/Services.ts#L95-L100" + "specLocation": "inference/put_jinaai/PutJinaAiRequest.ts#L139-L160" }, { "kind": "interface", "name": { -<<<<<<< HEAD "name": "MistralServiceSettings", "namespace": "inference.put_mistral" }, @@ -124800,10 +122507,6 @@ { "kind": "interface", "name": { -======= -======= ->>>>>>> 52fce7a43 (Add Azure OpenAI details and examples) ->>>>>>> 1e946eb24 (Add Azure OpenAI inference details (#4019)) "name": "OpenAIServiceSettings", "namespace": "inference.put_openai" }, diff --git a/output/schema/schema.json b/output/schema/schema.json index d670fbbb7e..3bdde706cb 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9348,6 +9348,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.14.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-azureaistudio", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "name": "inference.put_azureaistudio", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{azureaistudio_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150815,6 +150860,310 @@ }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "A valid API key of your Azure AI Studio model deployment.\nThis key can be found on the overview page for your deployment in the management section of your Azure AI Studio account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "azureaistudio-api-keys", + "extDocUrl": "https://ai.azure.com/", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`.\nThe `token` endpoint type is for \"pay as you go\" endpoints that are billed per token.\nThe `realtime` endpoint type is for \"real-time\" endpoints that are billed per hour of usage.", + "extDocId": "azureaistudio-endpoint-types", + "extDocUrl": "https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/deployments-overview#billing-for-deploying-and-inferencing-llms-in-azure-ai-studio", + "name": "endpoint_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The target URL of your Azure AI Studio model deployment.\nThis can be found on the overview page for your deployment in the management section of your Azure AI Studio account.", + "name": "target", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `cohere` - available for `text_embedding` and `completion` task types\n* `databricks` - available for `completion` task type only\n* `meta` - available for `completion` task type only\n* `microsoft_phi` - available for `completion` task type only\n* `mistral` - available for `completion` task type only\n* `openai` - available for `text_embedding` and `completion` task types", + "name": "provider", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Azure AI Studio.\nBy default, the `azureaistudio` service sets the number of requests allowed per minute to 240.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L92-L134" + }, + { + "kind": "interface", + "name": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + }, + "properties": [ + { + "description": "For a `completion` task, instruct the inference process to perform sampling.\nIt has no effect unless `temperature` or `top_p` is specified.", + "name": "do_sample", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, provide a hint for the maximum number of output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, control the apparent creativity of generated completions with a sampling temperature.\nIt must be a number in the range of 0.0 to 2.0.\nIt should not be used if `top_p` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability.\nIt is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0.\nIt should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, specify the user issuing the request.\nThis information can be used for abuse detection.", + "name": "user", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L136-L164" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L83-L86" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `azureaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioServiceSettings", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskSettings", + "namespace": "inference.put_azureaistudio" + } + } + } + ] + }, + "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAzureAiStudioRequestExample1": { + "description": "Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-Uri\",\n \"provider\": \"openai\",\n \"endpoint_type\": \"token\"\n }\n}" + }, + "PutAzureAiStudioRequestExample2": { + "description": "Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"azureaistudio\",\n \"service_settings\": {\n \"api_key\": \"Azure-AI-Studio-API-key\",\n \"target\": \"Target-URI\",\n \"provider\": \"databricks\",\n \"endpoint_type\": \"realtime\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_azureaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AzureAiStudioTaskType", + "namespace": "inference.put_azureaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "azureaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L28-L81" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "azureaistudio" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_azureaistudio" + }, + "specLocation": "inference/put_azureaistudio/PutAzureAiStudioRequest.ts#L88-L90" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index f60031d59c..fe9191ff0c 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13277,6 +13277,39 @@ export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo export type InferencePutAlibabacloudServiceType = 'alibabacloud-ai-search' +export interface InferencePutAzureaistudioAzureAiStudioServiceSettings { + api_key: string + endpoint_type: string + target: string + provider: string + rate_limit?: InferenceRateLimitSetting +} + +export interface InferencePutAzureaistudioAzureAiStudioTaskSettings { + do_sample?: float + max_new_tokens?: integer + temperature?: float + top_p?: float + user?: string +} + +export type InferencePutAzureaistudioAzureAiStudioTaskType = 'completion' | 'text_embedding' + +export interface InferencePutAzureaistudioRequest extends RequestBase { + task_type: InferencePutAzureaistudioAzureAiStudioTaskType + azureaistudio_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAzureaistudioServiceType + service_settings: InferencePutAzureaistudioAzureAiStudioServiceSettings + task_settings?: InferencePutAzureaistudioAzureAiStudioTaskSettings + } +} + +export type InferencePutAzureaistudioResponse = InferenceInferenceEndpointInfo + +export type InferencePutAzureaistudioServiceType = 'azureaistudio' + export interface InferencePutAzureopenaiAzureOpenAIServiceSettings { api_key?: string api_version: string diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 391312abcd..ec48f4fc75 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -332,6 +332,7 @@ inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/refere inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html +inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html inference-api-put-huggingface,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-hugging-face.html inference-api-put-googlevertexai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-google-vertex-ai.html diff --git a/specification/_json_spec/inference.put_azureaistudio.json b/specification/_json_spec/inference.put_azureaistudio.json new file mode 100644 index 0000000000..00de83eca7 --- /dev/null +++ b/specification/_json_spec/inference.put_azureaistudio.json @@ -0,0 +1,35 @@ +{ + "inference.put_azureaistudio": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-ai-studio.html", + "description": "Configure an Azure AI Studio inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{azureaistudio_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "azureaistudio_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts new file mode 100644 index 0000000000..5e3602f381 --- /dev/null +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioRequest.ts @@ -0,0 +1,164 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create an Azure AI studio inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `azureaistudio` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_azureaistudio + * @availability stack since=8.14.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-azureaistudio + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{azureaistudio_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: AzureAiStudioTaskType + /** + * The unique identifier of the inference endpoint. + */ + azureaistudio_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `azureaistudio`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `openai` service. + */ + service_settings: AzureAiStudioServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AzureAiStudioTaskSettings + } +} + +export enum AzureAiStudioTaskType { + completion, + text_embedding +} + +export enum ServiceType { + azureaistudio +} + +export class AzureAiStudioServiceSettings { + /** + * A valid API key of your Azure AI Studio model deployment. + * This key can be found on the overview page for your deployment in the management section of your Azure AI Studio account. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id azureaistudio-api-keys + */ + api_key: string + /** + * The type of endpoint that is available for deployment through Azure AI Studio: `token` or `realtime`. + * The `token` endpoint type is for "pay as you go" endpoints that are billed per token. + * The `realtime` endpoint type is for "real-time" endpoints that are billed per hour of usage. + * @ext_doc_id azureaistudio-endpoint-types + */ + endpoint_type: string + /** + * The target URL of your Azure AI Studio model deployment. + * This can be found on the overview page for your deployment in the management section of your Azure AI Studio account. + */ + target: string + /** + * The model provider for your deployment. + * Note that some providers may support only certain task types. + * Supported providers include: + * + * * `cohere` - available for `text_embedding` and `completion` task types + * * `databricks` - available for `completion` task type only + * * `meta` - available for `completion` task type only + * * `microsoft_phi` - available for `completion` task type only + * * `mistral` - available for `completion` task type only + * * `openai` - available for `text_embedding` and `completion` task types + */ + provider: string + /** + * This setting helps to minimize the number of rate limit errors returned from Azure AI Studio. + * By default, the `azureaistudio` service sets the number of requests allowed per minute to 240. + */ + rate_limit?: RateLimitSetting +} + +export class AzureAiStudioTaskSettings { + /** + * For a `completion` task, instruct the inference process to perform sampling. + * It has no effect unless `temperature` or `top_p` is specified. + */ + do_sample?: float + /** + * For a `completion` task, provide a hint for the maximum number of output tokens to be generated. + * @server_default 64 + */ + max_new_tokens?: integer + /** + * For a `completion` task, control the apparent creativity of generated completions with a sampling temperature. + * It must be a number in the range of 0.0 to 2.0. + * It should not be used if `top_p` is specified. + */ + temperature?: float + /** + * For a `completion` task, make the model consider the results of the tokens with nucleus sampling probability. + * It is an alternative value to `temperature` and must be a number in the range of 0.0 to 2.0. + * It should not be used if `temperature` is specified. + */ + top_p?: float + /** + * For a `text_embedding` task, specify the user issuing the request. + * This information can be used for abuse detection. + */ + user?: string +} diff --git a/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_azureaistudio/PutAzureAiStudioResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml new file mode 100644 index 0000000000..0db68a9a3a --- /dev/null +++ b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/azure_ai_studio_embeddings` to create an inference endpoint that performs a text_embedding task. Note that you do not specify a model here, as it is defined already in the Azure AI Studio deployment. +# method_request: "PUT _inference/text_embedding/azure_ai_studio_embeddings" +# type: "request" +value: |- + { + "service": "azureaistudio", + "service_settings": { + "api_key": "Azure-AI-Studio-API-key", + "target": "Target-Uri", + "provider": "openai", + "endpoint_type": "token" + } + } diff --git a/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml new file mode 100644 index 0000000000..74d00dce8e --- /dev/null +++ b/specification/inference/put_azureaistudio/examples/request/PutAzureAiStudioRequestExample2.yaml @@ -0,0 +1,14 @@ +summary: A completion task +description: Run `PUT _inference/completion/azure_ai_studio_completion` to create an inference endpoint that performs a completion task. +# method_request: "PUT _inference/completion/azure_ai_studio_completion" +# type: "request" +value: |- + { + "service": "azureaistudio", + "service_settings": { + "api_key": "Azure-AI-Studio-API-key", + "target": "Target-URI", + "provider": "databricks", + "endpoint_type": "realtime" + } + }