From e0193ae24e73ac368684b230aecbfcffc703693c Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 08:01:16 -0700 Subject: [PATCH] Add Cohere inference API details (#4025) (cherry picked from commit 2dc985a1e32d0b199f2c3cd7a5d85b60e3eb1f52) --- output/openapi/elasticsearch-openapi.json | 180 ++++ .../elasticsearch-serverless-openapi.json | 180 ++++ output/schema/schema-serverless.json | 942 ++++++++++++++++-- output/schema/schema.json | 417 ++++++++ output/typescript/types.ts | 40 + specification/_doc_ids/table.csv | 2 + .../_json_spec/inference.put_cohere.json | 35 + .../inference/put_cohere/PutCohereRequest.ts | 194 ++++ .../inference/put_cohere/PutCohereResponse.ts | 24 + .../request/PutCohereRequestExample1.yaml | 13 + .../request/PutCohereRequestExample2.yaml | 16 + 11 files changed, 1941 insertions(+), 102 deletions(-) create mode 100644 specification/_json_spec/inference.put_cohere.json create mode 100644 specification/inference/put_cohere/PutCohereRequest.ts create mode 100644 specification/inference/put_cohere/PutCohereResponse.ts create mode 100644 specification/inference/put_cohere/examples/request/PutCohereRequestExample1.yaml create mode 100644 specification/inference/put_cohere/examples/request/PutCohereRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 32b59d1d3f..7f52c162e2 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -18182,6 +18182,92 @@ "x-state": "Added in 8.14.0" } }, + "/_inference/{task_type}/{cohere_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Cohere inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-cohere", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_cohere:CohereTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "cohere_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_cohere:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_cohere:CohereServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_cohere:CohereTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutCohereRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + }, + "PutCohereRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.13.0" + } + }, "/_inference/{task_type}/{elasticsearch_inference_id}": { "put": { "tags": [ @@ -77952,6 +78038,100 @@ } } }, + "inference.put_cohere:CohereTaskType": { + "type": "string", + "enum": [ + "completion", + "rerank", + "text_embedding" + ] + }, + "inference.put_cohere:ServiceType": { + "type": "string", + "enum": [ + "cohere" + ] + }, + "inference.put_cohere:CohereServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://dashboard.cohere.com/api-keys" + }, + "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "embedding_type": { + "$ref": "#/components/schemas/inference.put_cohere:EmbeddingType" + }, + "model_id": { + "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "similarity": { + "$ref": "#/components/schemas/inference.put_cohere:SimilarityType" + } + }, + "required": [ + "api_key" + ] + }, + "inference.put_cohere:EmbeddingType": { + "type": "string", + "enum": [ + "byte", + "float", + "int8" + ] + }, + "inference.put_cohere:SimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference.put_cohere:CohereTaskSettings": { + "type": "object", + "properties": { + "input_type": { + "$ref": "#/components/schemas/inference.put_cohere:InputType" + }, + "return_documents": { + "description": "For a `rerank` task, return doc text within the results.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "type": "number" + }, + "truncate": { + "$ref": "#/components/schemas/inference.put_cohere:TruncateType" + } + } + }, + "inference.put_cohere:InputType": { + "type": "string", + "enum": [ + "classification", + "clustering", + "ingest", + "search" + ] + }, + "inference.put_cohere:TruncateType": { + "type": "string", + "enum": [ + "END", + "NONE", + "START" + ] + }, "inference.put_elasticsearch:ElasticsearchTaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 5abea0c86d..84a54ca1b0 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -10008,6 +10008,92 @@ "x-state": "Added in 8.14.0" } }, + "/_inference/{task_type}/{cohere_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Cohere inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-cohere", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_cohere:CohereTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "cohere_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_cohere:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_cohere:CohereServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_cohere:CohereTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutCohereRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + }, + "PutCohereRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.13.0" + } + }, "/_inference/{task_type}/{elasticsearch_inference_id}": { "put": { "tags": [ @@ -49148,6 +49234,100 @@ } } }, + "inference.put_cohere:CohereTaskType": { + "type": "string", + "enum": [ + "completion", + "rerank", + "text_embedding" + ] + }, + "inference.put_cohere:ServiceType": { + "type": "string", + "enum": [ + "cohere" + ] + }, + "inference.put_cohere:CohereServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://dashboard.cohere.com/api-keys" + }, + "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "embedding_type": { + "$ref": "#/components/schemas/inference.put_cohere:EmbeddingType" + }, + "model_id": { + "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "similarity": { + "$ref": "#/components/schemas/inference.put_cohere:SimilarityType" + } + }, + "required": [ + "api_key" + ] + }, + "inference.put_cohere:EmbeddingType": { + "type": "string", + "enum": [ + "byte", + "float", + "int8" + ] + }, + "inference.put_cohere:SimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference.put_cohere:CohereTaskSettings": { + "type": "object", + "properties": { + "input_type": { + "$ref": "#/components/schemas/inference.put_cohere:InputType" + }, + "return_documents": { + "description": "For a `rerank` task, return doc text within the results.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "type": "number" + }, + "truncate": { + "$ref": "#/components/schemas/inference.put_cohere:TruncateType" + } + } + }, + "inference.put_cohere:InputType": { + "type": "string", + "enum": [ + "classification", + "clustering", + "ingest", + "search" + ] + }, + "inference.put_cohere:TruncateType": { + "type": "string", + "enum": [ + "END", + "NONE", + "START" + ] + }, "inference.put_elasticsearch:ElasticsearchTaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 679db03b78..0e81b4aa10 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4599,6 +4599,7 @@ }, "stack": { <<<<<<< HEAD +<<<<<<< HEAD ======= <<<<<<< HEAD <<<<<<< HEAD @@ -4742,11 +4743,15 @@ ======= "since": "8.13.0", >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "since": "8.13.0", +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "stability": "stable", "visibility": "public" } }, <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "description": "Create an Azure AI studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `azureaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-azureaistudio", @@ -4755,14 +4760,19 @@ ======= ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-cohere", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html", "name": "inference.put_cohere", <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "privileges": { "cluster": [ "manage_inference" @@ -4771,6 +4781,7 @@ "request": { "name": "Request", <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "namespace": "inference.put_azureaistudio" ======= @@ -4779,6 +4790,9 @@ ======= "namespace": "inference.put_cohere" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "namespace": "inference.put_cohere" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4787,6 +4801,7 @@ "response": { "name": "Response", <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "namespace": "inference.put_azureaistudio" ======= @@ -4795,6 +4810,9 @@ ======= "namespace": "inference.put_cohere" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "namespace": "inference.put_cohere" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, "responseMediaType": [ "application/json" @@ -4805,6 +4823,7 @@ "PUT" ], <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "path": "/_inference/{task_type}/{azureaistudio_inference_id}" ======= @@ -4813,6 +4832,9 @@ ======= "path": "/_inference/{task_type}/{cohere_inference_id}" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "path": "/_inference/{task_type}/{cohere_inference_id}" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) } ] }, @@ -4824,6 +4846,7 @@ }, "stack": { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "since": "8.14.0", ======= @@ -5108,6 +5131,8 @@ ======= >>>>>>> 38b46ca86 (Add Anthropic inference API details (#4023)) >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -27564,6 +27589,8 @@ "kind": "properties", "properties": [ { +<<<<<<< HEAD +======= "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", @@ -27578,46 +27605,26 @@ } }, { -<<<<<<< HEAD -<<<<<<< HEAD - "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", -======= -<<<<<<< HEAD -======= - "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "description": "The type of service supported for the specified task type. In this case, `cohere`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", -<<<<<<< HEAD - "namespace": "inference.put_alibabacloud" -======= - "namespace": "inference.put_amazonbedrock" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "namespace": "inference.put_cohere" } } }, { -<<<<<<< HEAD - "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", -======= - "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "AlibabaCloudServiceSettings", - "namespace": "inference.put_alibabacloud" -======= - "name": "AmazonBedrockServiceSettings", - "namespace": "inference.put_amazonbedrock" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" } } }, @@ -27628,52 +27635,24 @@ "type": { "kind": "instance_of", "type": { -<<<<<<< HEAD - "name": "AlibabaCloudTaskSettings", - "namespace": "inference.put_alibabacloud" -======= - "name": "AmazonBedrockTaskSettings", - "namespace": "inference.put_amazonbedrock" ->>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" } } } ] }, -<<<<<<< HEAD - "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", - "examples": { - "PutAlibabaCloudRequestExample1": { - "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", - "summary": "A completion task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" - }, - "PutAlibabaCloudRequestExample2": { - "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", - "summary": "A rerank task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" - }, - "PutAlibabaCloudRequestExample3": { - "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", - "summary": "A sparse embedding task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" - }, - "PutAlibabaCloudRequestExample4": { - "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" -======= - "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAmazonBedrockRequestExample1": { - "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "PutCohereRequestExample1": { + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", "summary": "A text embedding task", - "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" }, - "PutAmazonBedrockRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "PutCohereRequestExample2": { + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" } }, "inherits": { @@ -27685,7 +27664,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_amazonbedrock" + "namespace": "inference.put_cohere" }, "path": [ { @@ -27695,14 +27674,14 @@ "type": { "kind": "instance_of", "type": { - "name": "AmazonBedrockTaskType", - "namespace": "inference.put_amazonbedrock" + "name": "CohereTaskType", + "namespace": "inference.put_cohere" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "amazonbedrock_inference_id", + "name": "cohere_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27714,7 +27693,7 @@ } ], "query": [], - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" }, { "body": { @@ -27730,9 +27709,101 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_amazonbedrock" + "namespace": "inference.put_cohere" }, - "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + } + } + } + ] + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -27756,51 +27827,46 @@ } }, { -======= -======= ->>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) - "description": "The type of service supported for the specified task type. In this case, `anthropic`.", + "description": "The type of service supported for the specified task type. In this case, `elser`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_elser" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `elser` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AnthropicServiceSettings", - "namespace": "inference.put_anthropic" - } - } - }, - { - "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", - "name": "task_settings", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "AnthropicTaskSettings", - "namespace": "inference.put_anthropic" + "name": "ElserServiceSettings", + "namespace": "inference.put_elser" } } } ] }, - "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "deprecation": { + "description": "The elser service is deprecated and will be removed in a future release. Use the Elasticsearch inference integration instead, with model_id included in the service_settings.", + "version": "8.16.0" + }, + "description": "Create an ELSER inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutAnthropicRequestExample1": { - "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", - "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + "PutElserRequestExample1": { + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", + "summary": "A sparse embedding task", + "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n }\n}" + }, + "PutElserRequestExample2": { + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.", + "summary": "Adaptive allocations", + "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1\n }\n}" } }, "inherits": { @@ -27812,24 +27878,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_elser" }, "path": [ { - "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "description": "The type of the inference task that the model will perform.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "AnthropicTaskType", - "namespace": "inference.put_anthropic" + "name": "ElserTaskType", + "namespace": "inference.put_elser" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "anthropic_inference_id", + "name": "elser_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27841,7 +27907,7 @@ } ], "query": [], - "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" + "specLocation": "inference/put_elser/PutElserRequest.ts#L25-L82" }, { "body": { @@ -27854,12 +27920,567 @@ } } }, + "examples": { + "PutElserResponseExample1": { + "description": "A successful response when creating an ELSER inference endpoint.", + "value": "{\n \"inference_id\": \"my-elser-model\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n },\n \"task_settings\": {}\n}" + } + }, "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_anthropic" + "namespace": "inference.put_elser" }, - "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" + "specLocation": "inference/put_elser/PutElserResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `googleaistudio`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_googleaistudio" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `googleaistudio` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleAiStudioServiceSettings", + "namespace": "inference.put_googleaistudio" + } + } + } + ] + }, + "description": "Create an Google AI Studio inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutGoogleAiStudioRequestExample1": { + "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_googleaistudio" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleAiStudioTaskType", + "namespace": "inference.put_googleaistudio" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "googleaistudio_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L27-L75" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `googlevertexai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `googlevertexai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAIServiceSettings", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAITaskSettings", + "namespace": "inference.put_googlevertexai" + } + } + } + ] + }, + "description": "Create a Google Vertex AI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutGoogleVertexAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample2": { + "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", + "summary": "A rerank task", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_googlevertexai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "GoogleVertexAITaskType", + "namespace": "inference.put_googlevertexai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "googlevertexai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L28-L81" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_googlevertexai" + }, + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { +>>>>>>> 2dc985a1e (Add Cohere inference API details (#4025)) + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { +<<<<<<< HEAD +<<<<<<< HEAD + "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", +======= +<<<<<<< HEAD +======= + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +>>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", +<<<<<<< HEAD + "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + } + } + }, + { +<<<<<<< HEAD + "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", +======= + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +>>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { +<<<<<<< HEAD + "name": "AlibabaCloudServiceSettings", + "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { +<<<<<<< HEAD + "name": "AlibabaCloudTaskSettings", + "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) + } + } + } + ] + }, +<<<<<<< HEAD + "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAlibabaCloudRequestExample1": { + "description": "Run `PUT _inference/completion/alibabacloud_ai_search_completion` to create an inference endpoint that performs a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"host\" : \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-qwen-turbo\",\n \"workspace\" : \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample2": { + "description": "Run `PUT _inference/rerank/alibabacloud_ai_search_rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-bge-reranker-larger\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample3": { + "description": "Run `PUT _inference/sparse_embedding/alibabacloud_ai_search_sparse` to create an inference endpoint that performs perform a sparse embedding task.", + "summary": "A sparse embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-sparse-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" + }, + "PutAlibabaCloudRequestExample4": { + "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" +======= + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "amazonbedrock_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { +======= +======= +>>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) + "description": "The type of service supported for the specified task type. In this case, `anthropic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + } + } + } + ] + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "anthropic_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -102618,6 +103239,7 @@ "members": [ { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "completion" }, @@ -102628,10 +103250,13 @@ "name": "space_embedding" ======= <<<<<<< HEAD +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) ======= "name": "completion" }, { +<<<<<<< HEAD "name": "text_embedding" } ], @@ -102691,6 +103316,8 @@ "name": "completion" }, { +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "rerank" }, { @@ -102825,6 +103452,7 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD "name": "rerank" }, { @@ -102890,6 +103518,8 @@ "kind": "enum", "members": [ { +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "sparse_embedding" } ], @@ -102912,9 +103542,12 @@ }, "specLocation": "inference/put_elser/PutElserRequest.ts#L88-L90" <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "kind": "enum", @@ -102928,6 +103561,7 @@ ], "name": { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "AzureAiStudioTaskType", "namespace": "inference.put_azureaistudio" @@ -102936,20 +103570,26 @@ ======= ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "GoogleAiStudioTaskType", "namespace": "inference.put_googleaistudio" }, "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L77-L80" <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "kind": "enum", "members": [ { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "azureaistudio" ======= @@ -102958,11 +103598,15 @@ ======= "name": "googleaistudio" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "name": "googleaistudio" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) } ], "name": { "name": "ServiceType", <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "namespace": "inference.put_azureaistudio" }, @@ -102977,12 +103621,18 @@ }, "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "namespace": "inference.put_googleaistudio" + }, + "specLocation": "inference/put_googleaistudio/PutGoogleAiStudioRequest.ts#L82-L84" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "kind": "enum", "members": [ { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "completion" ======= @@ -102991,6 +103641,9 @@ ======= "name": "rerank" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "name": "rerank" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "name": "text_embedding" @@ -102998,6 +103651,7 @@ ], "name": { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "AzureOpenAITaskType", "namespace": "inference.put_azureopenai" @@ -103006,20 +103660,26 @@ ======= ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "GoogleVertexAITaskType", "namespace": "inference.put_googlevertexai" }, "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L83-L86" <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "kind": "enum", "members": [ { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "azureopenai" ======= @@ -103028,11 +103688,15 @@ ======= "name": "googlevertexai" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "name": "googlevertexai" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) } ], "name": { "name": "ServiceType", <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "namespace": "inference.put_azureopenai" }, @@ -103047,6 +103711,11 @@ }, "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "namespace": "inference.put_googlevertexai" + }, + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L88-L90" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "kind": "enum", @@ -103054,12 +103723,16 @@ { <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD ======= >>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 38b46ca86 (Add Anthropic inference API details (#4023)) >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 2dc985a1e (Add Cohere inference API details (#4025)) +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "text_embedding" } ], @@ -123599,6 +124272,7 @@ "kind": "interface", "name": { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" @@ -124137,6 +124811,9 @@ { "kind": "interface", "name": { +======= +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "CohereServiceSettings", "namespace": "inference.put_cohere" }, @@ -124172,9 +124849,12 @@ "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", "name": "model_id", <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "required": false, "type": { "kind": "instance_of", @@ -124186,6 +124866,7 @@ }, { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from Azure.\nThe `azureopenai` service sets a default number of requests allowed per minute depending on the task type.\nFor `text_embedding`, it is set to `1440`.\nFor `completion`, it is set to `120`.", "extDocId": "azureopenai-quota-limits", @@ -124196,6 +124877,9 @@ ======= "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "rate_limit", "required": false, "type": { @@ -124208,6 +124892,7 @@ }, { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "description": "The name of your Azure OpenAI resource.\nYou can find this from the list of resources in the Azure Portal for your subscription.", "extDocId": "azureopenai-portal", @@ -124216,6 +124901,8 @@ ======= ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.", "name": "similarity", "required": false, @@ -124233,6 +124920,31 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "CohereTaskSettings", "namespace": "inference.put_cohere" }, @@ -124325,6 +125037,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "ElasticsearchServiceSettings", "namespace": "inference.put_elasticsearch" }, @@ -124467,6 +125180,8 @@ { "kind": "interface", "name": { +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "ElserServiceSettings", "namespace": "inference.put_elser" }, @@ -124668,9 +125383,12 @@ "description": "A valid service account in JSON format for the Google Vertex AI API.", "name": "service_account_json", <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "required": true, "type": { "kind": "instance_of", @@ -124682,6 +125400,7 @@ } ], <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L99-L144" ======= @@ -124690,11 +125409,15 @@ ======= "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= + "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L92-L118" +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "kind": "interface", "name": { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "AzureOpenAITaskSettings", "namespace": "inference.put_azureopenai" @@ -124706,6 +125429,8 @@ ======= ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "GoogleVertexAITaskSettings", "namespace": "inference.put_googlevertexai" }, @@ -124714,14 +125439,18 @@ "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.", "name": "auto_truncate", <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "required": false, "type": { "kind": "instance_of", "type": { <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD "name": "string", "namespace": "_builtins" @@ -124733,6 +125462,8 @@ ======= ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "boolean", "namespace": "_builtins" } @@ -124753,21 +125484,28 @@ ], "specLocation": "inference/put_googlevertexai/PutGoogleVertexAiRequest.ts#L120-L129" <<<<<<< HEAD +<<<<<<< HEAD >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) }, { "kind": "interface", "name": { <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD ======= >>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) >>>>>>> 72877ef81 (Add Amazon Bedrock inference API (#4022)) ======= >>>>>>> 38b46ca86 (Add Anthropic inference API details (#4023)) >>>>>>> 76ab18016 (Add Anthropic inference API details (#4023)) +======= +>>>>>>> 2dc985a1e (Add Cohere inference API details (#4025)) +>>>>>>> 397d37cf8 (Add Cohere inference API details (#4025)) "name": "HuggingFaceServiceSettings", "namespace": "inference.put_hugging_face" }, diff --git a/output/schema/schema.json b/output/schema/schema.json index cf9ac4d8e9..6dcb35c31c 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9528,6 +9528,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.13.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-cohere", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html", + "name": "inference.put_cohere", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_cohere" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_cohere" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{cohere_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -152135,6 +152180,378 @@ }, "specLocation": "inference/put_azureopenai/PutAzureOpenAiRequest.ts#L95-L97" }, + { + "kind": "interface", + "name": { + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" + }, + "properties": [ + { + "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "cohere-api-keys", + "extDocUrl": "https://dashboard.cohere.com/api-keys", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.", + "name": "embedding_type", + "required": false, + "serverDefault": "float", + "type": { + "kind": "instance_of", + "type": { + "name": "EmbeddingType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "SimilarityType", + "namespace": "inference.put_cohere" + } + } + } + ], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160" + }, + { + "kind": "interface", + "name": { + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" + }, + "properties": [ + { + "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InputType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "For a `rerank` task, return doc text within the results.", + "name": "return_documents", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "name": "top_n", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.", + "name": "truncate", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TruncateType", + "namespace": "inference.put_cohere" + } + } + } + ], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "rerank" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "CohereTaskType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L84-L88" + }, + { + "kind": "enum", + "members": [ + { + "name": "byte" + }, + { + "name": "float" + }, + { + "name": "int8" + } + ], + "name": { + "name": "EmbeddingType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L94-L98" + }, + { + "kind": "enum", + "members": [ + { + "name": "classification" + }, + { + "name": "clustering" + }, + { + "name": "ingest" + }, + { + "name": "search" + } + ], + "name": { + "name": "InputType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L100-L105" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `cohere`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" + } + } + } + ] + }, + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutCohereRequestExample1": { + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + }, + "PutCohereRequestExample2": { + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_cohere" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereTaskType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "cohere_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "cohere" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L90-L92" + }, + { + "kind": "enum", + "members": [ + { + "name": "cosine" + }, + { + "name": "dot_product" + }, + { + "name": "l2_norm" + } + ], + "name": { + "name": "SimilarityType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L107-L111" + }, + { + "kind": "enum", + "members": [ + { + "name": "END" + }, + { + "name": "NONE" + }, + { + "name": "START" + } + ], + "name": { + "name": "TruncateType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L113-L117" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 4442855b23..c90515b544 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13403,6 +13403,46 @@ export type InferencePutAzureopenaiResponse = InferenceInferenceEndpointInfo export type InferencePutAzureopenaiServiceType = 'azureopenai' +export interface InferencePutCohereCohereServiceSettings { + api_key: string + embedding_type?: InferencePutCohereEmbeddingType + model_id?: string + rate_limit?: InferenceRateLimitSetting + similarity?: InferencePutCohereSimilarityType +} + +export interface InferencePutCohereCohereTaskSettings { + input_type?: InferencePutCohereInputType + return_documents?: boolean + top_n?: integer + truncate?: InferencePutCohereTruncateType +} + +export type InferencePutCohereCohereTaskType = 'completion' | 'rerank' | 'text_embedding' + +export type InferencePutCohereEmbeddingType = 'byte' | 'float' | 'int8' + +export type InferencePutCohereInputType = 'classification' | 'clustering' | 'ingest' | 'search' + +export interface InferencePutCohereRequest extends RequestBase { + task_type: InferencePutCohereCohereTaskType + cohere_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutCohereServiceType + service_settings: InferencePutCohereCohereServiceSettings + task_settings?: InferencePutCohereCohereTaskSettings + } +} + +export type InferencePutCohereResponse = InferenceInferenceEndpointInfo + +export type InferencePutCohereServiceType = 'cohere' + +export type InferencePutCohereSimilarityType = 'cosine' | 'dot_product' | 'l2_norm' + +export type InferencePutCohereTruncateType = 'END' | 'NONE' | 'START' + export interface InferencePutElasticsearchAdaptiveAllocations { enabled?: boolean max_number_of_allocations?: integer diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 2c13436a25..02f3a093f2 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -105,6 +105,8 @@ cluster-state,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operat cluster-stats,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-stats cluster-update-settings,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-cluster-put-settings cluster,https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-cluster +cohere-api-keys,https://dashboard.cohere.com/api-keys +cohere-models,https://docs.cohere.com/docs/models#command common-options,https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html community-id-processor,https://www.elastic.co/guide/en/elasticsearch/reference/current/community-id-processor.html connector-sync-job-cancel,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-connector-sync-job-cancel diff --git a/specification/_json_spec/inference.put_cohere.json b/specification/_json_spec/inference.put_cohere.json new file mode 100644 index 0000000000..a00518f2c5 --- /dev/null +++ b/specification/_json_spec/inference.put_cohere.json @@ -0,0 +1,35 @@ +{ + "inference.put_cohere": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-cohere.html", + "description": "Configure a Cohere inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{cohere_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "cohere_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_cohere/PutCohereRequest.ts b/specification/inference/put_cohere/PutCohereRequest.ts new file mode 100644 index 0000000000..f54a4ef19e --- /dev/null +++ b/specification/inference/put_cohere/PutCohereRequest.ts @@ -0,0 +1,194 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { integer } from '@_types/Numeric' + +/** + * Create a Cohere inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `cohere` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_cohere + * @availability stack since=8.13.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-cohere + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{cohere_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: CohereTaskType + /** + * The unique identifier of the inference endpoint. + */ + cohere_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `cohere`. + */ + service: ServiceType + /** + * Settings used to install the inference model. + * These settings are specific to the `cohere` service. + */ + service_settings: CohereServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: CohereTaskSettings + } +} + +export enum CohereTaskType { + completion, + rerank, + text_embedding +} + +export enum ServiceType { + cohere +} + +export enum EmbeddingType { + byte, + float, + int8 +} + +export enum InputType { + classification, + clustering, + ingest, + search +} + +export enum SimilarityType { + cosine, + dot_product, + l2_norm +} + +export enum TruncateType { + END, + NONE, + START +} + +export class CohereServiceSettings { + /** + * A valid API key for your Cohere account. + * You can find or create your Cohere API keys on the Cohere API key settings page. + * + * IMPORTANT: You need to provide the API key only once, during the inference model creation. + * The get inference endpoint API does not retrieve your API key. + * After creating the inference model, you cannot change the associated API key. + * If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key. + * @ext_doc_id cohere-api-keys + */ + api_key: string + /** + * For a `text_embedding` task, the types of embeddings you want to get back. + * Use `byte` for signed int8 embeddings (this is a synonym of `int8`). + * Use `float` for the default float embeddings. + * Use `int8` for signed int8 embeddings. + * @server_default float + */ + embedding_type?: EmbeddingType + /** + * For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task. + * + * * For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command). + * * For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1). + * * For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed). + * + * The default value for a text embedding task is `embed-english-v2.0`. + */ + model_id?: string + /** + * This setting helps to minimize the number of rate limit errors returned from Cohere. + * By default, the `cohere` service sets the number of requests allowed per minute to 10000. + */ + rate_limit?: RateLimitSetting + /** + * The similarity measure. + * If the `embedding_type` is `float`, the default value is `dot_product`. + * If the `embedding_type` is `int8` or `byte`, the default value is `cosine`. + */ + similarity?: SimilarityType +} + +export class CohereTaskSettings { + /** + * For a `text_embedding` task, the type of input passed to the model. + * Valid values are: + * + * * `classification`: Use it for embeddings passed through a text classifier. + * * `clustering`: Use it for the embeddings run through a clustering algorithm. + * * `ingest`: Use it for storing document embeddings in a vector database. + * * `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents. + * + * IMPORTANT: The `input_type` field is required when using embedding models `v3` and higher. + */ + input_type?: InputType + /** + * For a `rerank` task, return doc text within the results. + */ + return_documents?: boolean + /** + * For a `rerank` task, the number of most relevant documents to return. + * It defaults to the number of the documents. + * If this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query. + */ + top_n?: integer + /** + * For a `text_embedding` task, the method to handle inputs longer than the maximum token length. + * Valid values are: + * + * * `END`: When the input exceeds the maximum input token length, the end of the input is discarded. + * * `NONE`: When the input exceeds the maximum input token length, an error is returned. + * * `START`: When the input exceeds the maximum input token length, the start of the input is discarded. + */ + truncate?: TruncateType +} diff --git a/specification/inference/put_cohere/PutCohereResponse.ts b/specification/inference/put_cohere/PutCohereResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_cohere/PutCohereResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_cohere/examples/request/PutCohereRequestExample1.yaml b/specification/inference/put_cohere/examples/request/PutCohereRequestExample1.yaml new file mode 100644 index 0000000000..c2cf0070a0 --- /dev/null +++ b/specification/inference/put_cohere/examples/request/PutCohereRequestExample1.yaml @@ -0,0 +1,13 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/cohere-embeddings" +# type: "request" +value: |- + { + "service": "cohere", + "service_settings": { + "api_key": "Cohere-Api-key", + "model_id": "embed-english-light-v3.0", + "embedding_type": "byte" + } + } diff --git a/specification/inference/put_cohere/examples/request/PutCohereRequestExample2.yaml b/specification/inference/put_cohere/examples/request/PutCohereRequestExample2.yaml new file mode 100644 index 0000000000..f758cf6a1d --- /dev/null +++ b/specification/inference/put_cohere/examples/request/PutCohereRequestExample2.yaml @@ -0,0 +1,16 @@ +summary: A rerank task +description: Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task. +# method_request: "PUT _inference/rerank/cohere-rerank" +# type: "request" +value: |- + { + "service": "cohere", + "service_settings": { + "api_key": "Cohere-API-key", + "model_id": "rerank-english-v3.0" + }, + "task_settings": { + "top_n": 10, + "return_documents": true + } + }