diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index bff2fcf4e8..494e511731 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17861,6 +17861,102 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{alibabacloud_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an AlibabaCloud AI Search inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-alibabacloud", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "alibabacloud_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_alibabacloud:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAlibabaCloudRequestExample1": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample3": { + "summary": "A sparse embedding task", + "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample4": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{task_type}/{amazonbedrock_inference_id}": { "put": { "tags": [ @@ -77953,6 +78049,76 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_alibabacloud:AlibabaCloudTaskType": { + "type": "string", + "enum": [ + "completion", + "rerank", + "space_embedding", + "text_embedding" + ] + }, + "inference.put_alibabacloud:ServiceType": { + "type": "string", + "enum": [ + "alibabacloud-ai-search" + ] + }, + "inference.put_alibabacloud:AlibabaCloudServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key for the AlibabaCloud AI Search API.", + "type": "string" + }, + "host": { + "externalDocs": { + "url": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key" + }, + "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "service_id": { + "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max ÷ qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", + "type": "string" + }, + "workspace": { + "description": "The name of the workspace used for the inference task.", + "type": "string" + } + }, + "required": [ + "api_key", + "host", + "service_id", + "workspace" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_alibabacloud:AlibabaCloudTaskSettings": { + "type": "object", + "properties": { + "input_type": { + "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", + "type": "string" + }, + "return_token": { + "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", + "type": "boolean" + } + } + }, "inference.put_amazonbedrock:AmazonBedrockTaskType": { "type": "string", "enum": [ @@ -78009,15 +78175,6 @@ "secret_key" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { "type": "object", "properties": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 086c7e639d..46e1dfc138 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9817,6 +9817,102 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{alibabacloud_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an AlibabaCloud AI Search inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-alibabacloud", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "alibabacloud_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_alibabacloud:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_alibabacloud:AlibabaCloudTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAlibabaCloudRequestExample1": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample3": { + "summary": "A sparse embedding task", + "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample4": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{task_type}/{amazonbedrock_inference_id}": { "put": { "tags": [ @@ -49273,6 +49369,76 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_alibabacloud:AlibabaCloudTaskType": { + "type": "string", + "enum": [ + "completion", + "rerank", + "space_embedding", + "text_embedding" + ] + }, + "inference.put_alibabacloud:ServiceType": { + "type": "string", + "enum": [ + "alibabacloud-ai-search" + ] + }, + "inference.put_alibabacloud:AlibabaCloudServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key for the AlibabaCloud AI Search API.", + "type": "string" + }, + "host": { + "externalDocs": { + "url": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key" + }, + "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "service_id": { + "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max ÷ qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", + "type": "string" + }, + "workspace": { + "description": "The name of the workspace used for the inference task.", + "type": "string" + } + }, + "required": [ + "api_key", + "host", + "service_id", + "workspace" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_alibabacloud:AlibabaCloudTaskSettings": { + "type": "object", + "properties": { + "input_type": { + "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", + "type": "string" + }, + "return_token": { + "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", + "type": "boolean" + } + } + }, "inference.put_amazonbedrock:AmazonBedrockTaskType": { "type": "string", "enum": [ @@ -49329,15 +49495,6 @@ "secret_key" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { "type": "object", "properties": { diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 46a78cb008..9f5e69717a 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4767,17 +4767,28 @@ "stack": { <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD ======= ======= "since": "8.14.0", +======= + "since": "8.16.0", +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) "stability": "stable", "visibility": "public" } }, +<<<<<<< HEAD "description": "Create an Azure OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureopenai` service.\n\nThe list of chat completion models that you can choose from in your Azure OpenAI deployment include:\n\n* [GPT-4 and GPT-4 Turbo models](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-4-and-gpt-4-turbo-models)\n* [GPT-3.5](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#gpt-35)\n\nThe list of embeddings models that you can choose from in your deployment can be found in the [Azure models documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions#embeddings).\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-azureopenai", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-azure-openai.html", "name": "inference.put_azureopenai", +======= + "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-alibabacloud", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html", + "name": "inference.put_alibabacloud", +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) "privileges": { "cluster": [ "manage_inference" @@ -4785,7 +4796,11 @@ }, "request": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_azureopenai" +======= + "namespace": "inference.put_alibabacloud" +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4793,7 +4808,11 @@ ], "response": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_azureopenai" +======= + "namespace": "inference.put_alibabacloud" +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) }, "responseMediaType": [ "application/json" @@ -4803,7 +4822,11 @@ "methods": [ "PUT" ], +<<<<<<< HEAD "path": "/_inference/{task_type}/{azureopenai_inference_id}" +======= + "path": "/_inference/{task_type}/{alibabacloud_inference_id}" +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) } ] }, @@ -4814,7 +4837,10 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD >>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +======= +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -27804,6 +27830,149 @@ }, { <<<<<<< HEAD +<<<<<<< HEAD +======= + "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_alibabacloud" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" + } + } + } + ] + }, + "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAlibabaCloudRequestExample1": { + "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample2": { + "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample3": { + "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", + "summary": "A sparse embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample4": { + "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_alibabacloud" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AlibabaCloudTaskType", + "namespace": "inference.put_alibabacloud" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "alibabacloud_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", ======= "description": "The type of service supported for the specified task type. In this case, `azureopenai`.", @@ -101391,6 +101560,41 @@ ======= "name": "completion" }, + { + "name": "rerank" + }, + { + "name": "space_embedding" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AlibabaCloudTaskType", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87" + }, + { + "kind": "enum", + "members": [ + { + "name": "alibabacloud-ai-search" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, { "name": "text_embedding" } @@ -122191,9 +122395,140 @@ "name": { <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD ======= ======= >>>>>>> d5b1a529a (Add Azure OpenAI inference details (#4019)) +======= + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" + }, + "properties": [ + { + "description": "A valid API key for the AlibabaCloud AI Search API.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", + "extDocId": "alibabacloud-api-keys", + "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", + "name": "host", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max ÷ qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", + "name": "service_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the workspace used for the inference task.", + "name": "workspace", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" + }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" + }, + "properties": [ + { + "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", + "name": "return_token", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154" + }, + { + "kind": "interface", + "name": { +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) "name": "AmazonBedrockServiceSettings", "namespace": "inference.put_amazonbedrock" }, @@ -122343,6 +122678,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "RateLimitSetting", "namespace": "inference._types" }, @@ -122366,6 +122702,8 @@ "kind": "interface", "name": { <<<<<<< HEAD +======= +>>>>>>> ef980f023 (Add Alibaba Cloud inference API (#4021)) "name": "AmazonBedrockTaskSettings", "namespace": "inference.put_amazonbedrock" }, diff --git a/output/schema/schema.json b/output/schema/schema.json index e5f93713d7..ec75d8b9ed 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9352,6 +9352,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.16.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-alibabacloud", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-alibabacloud-ai-search.html", + "name": "inference.put_alibabacloud", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_alibabacloud" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_alibabacloud" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{alibabacloud_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150978,6 +151023,287 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "interface", + "name": { + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" + }, + "properties": [ + { + "description": "A valid API key for the AlibabaCloud AI Search API.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", + "extDocId": "alibabacloud-api-keys", + "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", + "name": "host", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max ÷ qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", + "name": "service_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the workspace used for the inference task.", + "name": "workspace", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" + }, + { + "kind": "interface", + "name": { + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" + }, + "properties": [ + { + "description": "For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model.\nValid values are:\n\n* `ingest` for storing document embeddings in a vector database.\n* `search` for storing embeddings of search queries run against a vector database to find relevant documents.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", + "name": "return_token", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "rerank" + }, + { + "name": "space_embedding" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AlibabaCloudTaskType", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_alibabacloud" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" + } + } + } + ] + }, + "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAlibabaCloudRequestExample1": { + "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample2": { + "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample3": { + "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", + "summary": "A sparse embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample4": { + "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_alibabacloud" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AlibabaCloudTaskType", + "namespace": "inference.put_alibabacloud" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "alibabacloud_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "alibabacloud-ai-search" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_alibabacloud" + }, + "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index a0d2f2699d..a6e7ef730f 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13252,6 +13252,36 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutAlibabacloudAlibabaCloudServiceSettings { + api_key: string + host: string + rate_limit?: InferenceRateLimitSetting + service_id: string + workspace: string +} + +export interface InferencePutAlibabacloudAlibabaCloudTaskSettings { + input_type?: string + return_token?: boolean +} + +export type InferencePutAlibabacloudAlibabaCloudTaskType = 'completion' | 'rerank' | 'space_embedding' | 'text_embedding' + +export interface InferencePutAlibabacloudRequest extends RequestBase { + task_type: InferencePutAlibabacloudAlibabaCloudTaskType + alibabacloud_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAlibabacloudServiceType + service_settings: InferencePutAlibabacloudAlibabaCloudServiceSettings + task_settings?: InferencePutAlibabacloudAlibabaCloudTaskSettings + } +} + +export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo + +export type InferencePutAlibabacloudServiceType = 'alibabacloud-ai-search' + export interface InferencePutAmazonbedrockAmazonBedrockServiceSettings { access_key: string model: string diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 68245a033a..08e1ad46a0 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -2,6 +2,7 @@ apis,https://www.elastic.co/docs/api/doc/elasticsearch/v8 add-nodes,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/add-elasticsearch-nodes.html alias-update,https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-indices-put-alias aliases-update,https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-indices-update-aliases +alibabacloud-api-keys,https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key amazonbedrock-models,https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html amazonbedrock-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html analysis-analyzers,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/analysis-analyzers.html @@ -338,6 +339,7 @@ inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branc inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-post-eis-chat-completion,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html inference-api-put,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/put-inference-api.html +inference-api-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-alibabacloud-ai-search.html inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-openai.html inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-ai-studio.html inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-cohere.html diff --git a/specification/_json_spec/inference.put_alibabacloud.json b/specification/_json_spec/inference.put_alibabacloud.json new file mode 100644 index 0000000000..b39d5abe97 --- /dev/null +++ b/specification/_json_spec/inference.put_alibabacloud.json @@ -0,0 +1,35 @@ +{ + "inference.put_alibabacloud": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html", + "description": "Configure an AlibabaCloud AI Search inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{alibabacloud_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "alibabacloud_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts new file mode 100644 index 0000000000..11770daa84 --- /dev/null +++ b/specification/inference/put_alibabacloud/PutAlibabaCloudRequest.ts @@ -0,0 +1,154 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create an AlibabaCloud AI Search inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_alibabacloud + * @availability stack since=8.16.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-alibabacloud + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{alibabacloud_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: AlibabaCloudTaskType + /** + * The unique identifier of the inference endpoint. + */ + alibabacloud_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service. + */ + service_settings: AlibabaCloudServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AlibabaCloudTaskSettings + } +} + +export enum AlibabaCloudTaskType { + completion, + rerank, + space_embedding, + text_embedding +} + +export enum ServiceType { + 'alibabacloud-ai-search' +} + +export class AlibabaCloudServiceSettings { + /** + * A valid API key for the AlibabaCloud AI Search API. + */ + api_key: string + /** + * The name of the host address used for the inference task. + * You can find the host address in the API keys section of the documentation. + * @ext_doc_id alibabacloud-api-keys + */ + host: string + /** + * This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search. + * By default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`. + */ + rate_limit?: RateLimitSetting + /** + * The name of the model service to use for the inference task. + * The following service IDs are available for the `completion` task: + * + * * `ops-qwen-turbo` + * * `qwen-turbo` + * * `qwen-plus` + * * `qwen-max ÷ qwen-max-longcontext` + * + * The following service ID is available for the `rerank` task: + * + * * `ops-bge-reranker-larger` + * + * The following service ID is available for the `sparse_embedding` task: + * + * * `ops-text-sparse-embedding-001` + * + * The following service IDs are available for the `text_embedding` task: + * + * `ops-text-embedding-001` + * `ops-text-embedding-zh-001` + * `ops-text-embedding-en-001` + * `ops-text-embedding-002` + */ + service_id: string + /** + * The name of the workspace used for the inference task. + */ + workspace: string +} + +export class AlibabaCloudTaskSettings { + /** + * For a `sparse_embedding` or `text_embedding` task, specify the type of input passed to the model. + * Valid values are: + * + * * `ingest` for storing document embeddings in a vector database. + * * `search` for storing embeddings of search queries run against a vector database to find relevant documents. + */ + input_type?: string + /** + * For a `sparse_embedding` task, it affects whether the token name will be returned in the response. + * It defaults to `false`, which means only the token ID will be returned in the response. + */ + return_token?: boolean +} diff --git a/specification/inference/put_alibabacloud/PutAlibabaCloudResponse.ts b/specification/inference/put_alibabacloud/PutAlibabaCloudResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_alibabacloud/PutAlibabaCloudResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample1.yaml b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample1.yaml new file mode 100644 index 0000000000..4a939a2ee2 --- /dev/null +++ b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample1.yaml @@ -0,0 +1,14 @@ +summary: A completion task +description: Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task. +# method_request: "PUT _inference/completion/alibabacloud_ai_search_completion" +# type: "request" +value: |- + { + "service": "alibabacloud-ai-search", + "service_settings": { + "host" : "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "api_key": "AlibabaCloud-API-Key", + "service_id": "ops-qwen-turbo", + "workspace" : "default" + } + } diff --git a/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample2.yaml b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample2.yaml new file mode 100644 index 0000000000..1a4f9a832d --- /dev/null +++ b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample2.yaml @@ -0,0 +1,14 @@ +summary: A rerank task +description: Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task. +# method_request: "PUT _inference/rerank/alibabacloud_ai_search_rerank" +# type: "request" +value: |- + { + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "AlibabaCloud-API-Key", + "service_id": "ops-bge-reranker-larger", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } + } diff --git a/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample3.yaml b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample3.yaml new file mode 100644 index 0000000000..c43ee4b6bf --- /dev/null +++ b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample3.yaml @@ -0,0 +1,14 @@ +summary: A sparse embedding task +description: Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task. +# method_request: "PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse" +# type: "request" +value: |- + { + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "AlibabaCloud-API-Key", + "service_id": "ops-text-sparse-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } + } diff --git a/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample4.yaml b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample4.yaml new file mode 100644 index 0000000000..17a76344fb --- /dev/null +++ b/specification/inference/put_alibabacloud/examples/request/PutAlibabaCloudRequestExample4.yaml @@ -0,0 +1,14 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/alibabacloud_ai_search_embeddings" +# type: "request" +value: |- + { + "service": "alibabacloud-ai-search", + "service_settings": { + "api_key": "AlibabaCloud-API-Key", + "service_id": "ops-text-embedding-001", + "host": "default-j01.platform-cn-shanghai.opensearch.aliyuncs.com", + "workspace": "default" + } + }