From 92fdccc8181866f990de29485d6773a1a5dbad0f Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 09:04:52 -0700 Subject: [PATCH] Add Amazon Bedrock inference API (#4022) (cherry picked from commit f5eaaab2433911c3853e68272e8bcd5e5f7cd5f1) --- output/openapi/elasticsearch-openapi.json | 185 ++- .../elasticsearch-serverless-openapi.json | 1235 ++++++++++++++- output/schema/schema-serverless.json | 1370 +++++++++++++++-- output/schema/schema.json | 355 ++++- output/typescript/types.ts | 33 + package-lock.json | 26 +- specification/_doc_ids/table.csv | 3 + .../inference.put_amazonbedrock.json | 35 + .../PutAmazonBedrockRequest.ts | 163 ++ .../PutAmazonBedrockResponse.ts | 24 + .../PutAmazonBedrockRequestExample1.yaml | 15 + .../PutAmazonBedrockRequestExample2.yaml | 12 + .../request/PutOpenAiRequestExample2.yaml | 13 +- 13 files changed, 3299 insertions(+), 170 deletions(-) create mode 100644 specification/_json_spec/inference.put_amazonbedrock.json create mode 100644 specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts create mode 100644 specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts create mode 100644 specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml create mode 100644 specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 6a5c5edaa4..c81b55f36a 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17861,6 +17861,92 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{amazonbedrock_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Amazon Bedrock inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-amazonbedrock", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "amazonbedrock_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAmazonBedrockRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{anthropic_inference_id}": { "put": { "tags": [ @@ -18689,8 +18775,8 @@ }, "PutOpenAiRequestExample2": { "summary": "A completion task", - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } } } @@ -77695,6 +77781,92 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_amazonbedrock:AmazonBedrockTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_amazonbedrock:ServiceType": { + "type": "string", + "enum": [ + "amazonbedrock" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockServiceSettings": { + "type": "object", + "properties": { + "access_key": { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "type": "string" + }, + "model": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "type": "string" + }, + "region": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "secret_key": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html" + }, + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "type": "string" + } + }, + "required": [ + "access_key", + "model", + "region", + "secret_key" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { + "type": "object", + "properties": { + "max_new_tokens": { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + } + } + }, "inference.put_anthropic:AnthropicTaskType": { "type": "string", "enum": [ @@ -77727,15 +77899,6 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_anthropic:AnthropicTaskSettings": { "type": "object", "properties": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 4f836cc6ca..b7f4fca5f3 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9817,6 +9817,258 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{amazonbedrock_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Amazon Bedrock inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-amazonbedrock", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "amazonbedrock_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAmazonBedrockRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, + "/_inference/{task_type}/{anthropic_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Anthropic inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-anthropic", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "anthropic_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_anthropic:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, + "/_inference/{task_type}/{cohere_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Cohere inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-cohere", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_cohere:CohereTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "cohere_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_cohere:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_cohere:CohereServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_cohere:CohereTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutCohereRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + }, + "PutCohereRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.13.0" + } + }, "/_inference/{task_type}/{eis_inference_id}": { "put": { "tags": [ @@ -9885,6 +10137,372 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{elasticsearch_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an OpenAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `elasticsearch` service.\n\n> info\n> Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.\n\nIf you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-elasticsearch", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "elasticsearch_inference_id", + "description": "The unique identifier of the inference endpoint.\nThe must not match the `model_id`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_elasticsearch:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_elasticsearch:ElasticsearchTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutElasticsearchRequestExample1": { + "summary": "ELSER sparse embedding task", + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The `model_id` must be the ID of one of the built-in ELSER models. The API will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n },\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\" \n }\n}" + }, + "PutElasticsearchRequestExample2": { + "summary": "Elastic rerank task", + "description": "Run `PUT _inference/rerank/my-elastic-rerank` to create an inference endpoint that performs a rerank task using the built-in Elastic Rerank cross-encoder model. The `model_id` must be `.rerank-v1`, which is the ID of the built-in Elastic Rerank model. The API will automatically download the Elastic Rerank model if it isn't already downloaded and then deploy the model. Once deployed, the model can be used for semantic re-ranking with a `text_similarity_reranker` retriever.", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"model_id\": \".rerank-v1\", \n \"num_threads\": 1,\n \"adaptive_allocations\": { \n \"enabled\": true,\n \"min_number_of_allocations\": 1,\n \"max_number_of_allocations\": 4\n }\n }\n}" + }, + "PutElasticsearchRequestExample3": { + "summary": "E5 text embedding task", + "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task. The `model_id` must be the ID of one of the built-in E5 models. The API will automatically download the E5 model if it isn't already downloaded and then deploy the model.", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\" \n }\n}" + }, + "PutElasticsearchRequestExample4": { + "summary": "Eland text embedding task", + "description": "Run `PUT _inference/text_embedding/my-msmarco-minilm-model` to create an inference endpoint that performs a `text_embedding` task with a model that was uploaded by Eland.", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1,\n \"model_id\": \"msmarco-MiniLM-L12-cos-v5\" \n }\n}" + }, + "PutElasticsearchRequestExample5": { + "summary": "Adaptive allocation", + "description": "Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task and to configure adaptive allocations. The API request will automatically download the E5 model if it isn't already downloaded and then deploy the model.", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1,\n \"model_id\": \".multilingual-e5-small\"\n }\n}" + }, + "PutElasticsearchRequestExample6": { + "summary": "Existing model deployment", + "description": "Run `PUT _inference/sparse_embedding/use_existing_deployment` to use an already existing model deployment when creating an inference endpoint.", + "value": "{\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"deployment_id\": \".elser_model_2\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + }, + "examples": { + "PutElasticsearchResponseExample1": { + "description": "A successful response from `PUT _inference/sparse_embedding/use_existing_deployment`. It contains the model ID and the threads and allocations settings from the model deployment.\n", + "value": "{\n \"inference_id\": \"use_existing_deployment\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elasticsearch\",\n \"service_settings\": {\n \"num_allocations\": 2,\n \"num_threads\": 1,\n \"model_id\": \".elser_model_2\",\n \"deployment_id\": \".elser_model_2\"\n },\n \"chunking_settings\": {\n \"strategy\": \"sentence\",\n \"max_chunk_size\": 250,\n \"sentence_overlap\": 1\n }\n}" + } + } + } + } + } + }, + "x-state": "Added in 8.13.0" + } + }, + "/_inference/{task_type}/{elser_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an ELSER inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `elser` service.\nYou can also deploy ELSER by using the Elasticsearch inference integration.\n\n> info\n> Your Elasticsearch deployment contains a preconfigured ELSER inference endpoint, you only need to create the enpoint using the API if you want to customize the settings.\n\nThe API request will automatically download and deploy the ELSER model if it isn't already downloaded.\n\n> info\n> You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.\n\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-elser", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_elser:ElserTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "elser_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_elser:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_elser:ElserServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutElserRequestExample1": { + "summary": "A sparse embedding task", + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The request will automatically download the ELSER model if it isn't already downloaded and then deploy the model.", + "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n }\n}" + }, + "PutElserRequestExample2": { + "summary": "Adaptive allocations", + "description": "Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task with adaptive allocations. When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load.", + "value": "{\n \"service\": \"elser\",\n \"service_settings\": {\n \"adaptive_allocations\": {\n \"enabled\": true,\n \"min_number_of_allocations\": 3,\n \"max_number_of_allocations\": 10\n },\n \"num_threads\": 1\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + }, + "examples": { + "PutElserResponseExample1": { + "description": "A successful response when creating an ELSER inference endpoint.", + "value": "{\n \"inference_id\": \"my-elser-model\",\n \"task_type\": \"sparse_embedding\",\n \"service\": \"elser\",\n \"service_settings\": {\n \"num_allocations\": 1,\n \"num_threads\": 1\n },\n \"task_settings\": {}\n}" + } + } + } + } + } + }, + "deprecated": true, + "x-state": "Added in 8.11.0" + } + }, + "/_inference/{task_type}/{googleaistudio_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Google AI Studio inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `googleaistudio` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-googleaistudio", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "googleaistudio_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_googleaistudio:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_googleaistudio:GoogleAiStudioServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutGoogleAiStudioRequestExample1": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/google_ai_studio_completion` to create an inference endpoint to perform a `completion` task type.", + "value": "{\n \"service\": \"googleaistudio\",\n \"service_settings\": {\n \"api_key\": \"api-key\",\n \"model_id\": \"model-id\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, + "/_inference/{task_type}/{googlevertexai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create a Google Vertex AI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `googlevertexai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-googlevertexai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "googlevertexai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_googlevertexai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_googlevertexai:GoogleVertexAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutGoogleVertexAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/google_vertex_ai_embeddings` to create an inference endpoint to perform a `text_embedding` task type.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"model_id\": \"model-id\",\n \"location\": \"location\",\n \"project_id\": \"project-id\"\n }\n}" + }, + "PutGoogleVertexAiRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/google_vertex_ai_rerank` to create an inference endpoint to perform a `rerank` task type.", + "value": "{\n \"service\": \"googlevertexai\",\n \"service_settings\": {\n \"service_account_json\": \"service-account-json\",\n \"project_id\": \"project-id\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.15.0" + } + }, "/_inference/{task_type}/{huggingface_inference_id}": { "put": { "tags": [ @@ -9963,6 +10581,92 @@ "x-state": "Added in 8.12.0" } }, + "/_inference/{task_type}/{jinaai_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an JinaAI inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `jinaai` service.\n\nTo review the available `rerank` models, refer to .\nTo review the available `text_embedding` models, refer to the .\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-jinaai", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "jinaai_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_jinaai:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAIServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_jinaai:JinaAITaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutJinaAiRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"model_id\": \"jina-embeddings-v3\",\n \"api_key\": \"JinaAi-Api-key\"\n }\n}" + }, + "PutJinaAiRequestExample2": { + "summary": "A rerank task", + "description": "Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.", + "value": "{\n \"service\": \"jinaai\",\n \"service_settings\": {\n \"api_key\": \"JinaAI-Api-key\",\n \"model_id\": \"jina-reranker-v2-base-multilingual\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.18.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -10027,8 +10731,8 @@ }, "PutOpenAiRequestExample2": { "summary": "A completion task", - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } } } @@ -10186,7 +10890,7 @@ ] }, "examples": { - "InferenceRequestExample1": { + "PutWatsonxRequestExample1": { "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.", "value": "{\n \"service\": \"watsonxai\",\n \"service_settings\": {\n \"api_key\": \"Watsonx-API-Key\", \n \"url\": \"Wastonx-URL\", \n \"model_id\": \"ibm/slate-30m-english-rtrvr\",\n \"project_id\": \"IBM-Cloud-ID\", \n \"api_version\": \"2024-03-14\"\n }\n}" } @@ -48397,6 +49101,245 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_amazonbedrock:AmazonBedrockTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_amazonbedrock:ServiceType": { + "type": "string", + "enum": [ + "amazonbedrock" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockServiceSettings": { + "type": "object", + "properties": { + "access_key": { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "type": "string" + }, + "model": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "type": "string" + }, + "region": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "secret_key": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html" + }, + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "type": "string" + } + }, + "required": [ + "access_key", + "model", + "region", + "secret_key" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { + "type": "object", + "properties": { + "max_new_tokens": { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + } + } + }, + "inference.put_anthropic:AnthropicTaskType": { + "type": "string", + "enum": [ + "completion" + ] + }, + "inference.put_anthropic:ServiceType": { + "type": "string", + "enum": [ + "anthropic" + ] + }, + "inference.put_anthropic:AnthropicServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key for the Anthropic API.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model_id" + ] + }, + "inference.put_anthropic:AnthropicTaskSettings": { + "type": "object", + "properties": { + "max_tokens": { + "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", + "type": "number" + }, + "temperature": { + "externalDocs": { + "url": "https://docs.anthropic.com/en/api/messages" + }, + "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "type": "number" + } + }, + "required": [ + "max_tokens" + ] + }, + "inference.put_cohere:CohereTaskType": { + "type": "string", + "enum": [ + "completion", + "rerank", + "text_embedding" + ] + }, + "inference.put_cohere:ServiceType": { + "type": "string", + "enum": [ + "cohere" + ] + }, + "inference.put_cohere:CohereServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://dashboard.cohere.com/api-keys" + }, + "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "embedding_type": { + "$ref": "#/components/schemas/inference.put_cohere:EmbeddingType" + }, + "model_id": { + "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "similarity": { + "$ref": "#/components/schemas/inference.put_cohere:SimilarityType" + } + }, + "required": [ + "api_key" + ] + }, + "inference.put_cohere:EmbeddingType": { + "type": "string", + "enum": [ + "byte", + "float", + "int8" + ] + }, + "inference.put_cohere:SimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference.put_cohere:CohereTaskSettings": { + "type": "object", + "properties": { + "input_type": { + "$ref": "#/components/schemas/inference.put_cohere:InputType" + }, + "return_documents": { + "description": "For a `rerank` task, return doc text within the results.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "type": "number" + }, + "truncate": { + "$ref": "#/components/schemas/inference.put_cohere:TruncateType" + } + } + }, + "inference.put_cohere:InputType": { + "type": "string", + "enum": [ + "classification", + "clustering", + "ingest", + "search" + ] + }, + "inference.put_cohere:TruncateType": { + "type": "string", + "enum": [ + "END", + "NONE", + "START" + ] + }, "inference.put_eis:EisTaskType": { "type": "string", "enum": [ @@ -48424,11 +49367,220 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { + "inference.put_elasticsearch:ElasticsearchTaskType": { + "type": "string", + "enum": [ + "rerank", + "sparse_embedding", + "text_embedding" + ] + }, + "inference.put_elasticsearch:ServiceType": { + "type": "string", + "enum": [ + "elasticsearch" + ] + }, + "inference.put_elasticsearch:ElasticsearchServiceSettings": { "type": "object", "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", + "adaptive_allocations": { + "$ref": "#/components/schemas/inference.put_elasticsearch:AdaptiveAllocations" + }, + "deployment_id": { + "description": "The deployment identifier for a trained model deployment.\nWhen `deployment_id` is used the `model_id` is optional.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-import-model.html#ml-nlp-import-script" + }, + "description": "The name of the model to use for the inference task.\nIt can be the ID of a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model that was uploaded by using the Eland client.", + "type": "string" + }, + "num_allocations": { + "description": "The total number of allocations that are assigned to the model across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations are enabled, do not set this value because it's automatically set.", + "type": "number" + }, + "num_threads": { + "description": "The number of threads used by each model allocation during inference.\nThis setting generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.", + "type": "number" + } + }, + "required": [ + "model_id", + "num_threads" + ] + }, + "inference.put_elasticsearch:AdaptiveAllocations": { + "type": "object", + "properties": { + "enabled": { + "description": "Turn on `adaptive_allocations`.", + "type": "boolean" + }, + "max_number_of_allocations": { + "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.", + "type": "number" + }, + "min_number_of_allocations": { + "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", + "type": "number" + } + } + }, + "inference.put_elasticsearch:ElasticsearchTaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, return the document instead of only the index.", + "type": "boolean" + } + } + }, + "inference.put_elser:ElserTaskType": { + "type": "string", + "enum": [ + "sparse_embedding" + ] + }, + "inference.put_elser:ServiceType": { + "type": "string", + "enum": [ + "elser" + ] + }, + "inference.put_elser:ElserServiceSettings": { + "type": "object", + "properties": { + "adaptive_allocations": { + "$ref": "#/components/schemas/inference.put_elser:AdaptiveAllocations" + }, + "num_allocations": { + "description": "The total number of allocations this model is assigned across machine learning nodes.\nIncreasing this value generally increases the throughput.\nIf adaptive allocations is enabled, do not set this value because it's automatically set.", + "type": "number" + }, + "num_threads": { + "description": "The number of threads used by each model allocation during inference.\nIncreasing this value generally increases the speed per inference request.\nThe inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node.\nThe value must be a power of 2.\nThe maximum value is 32.\n\n> info\n> If you want to optimize your ELSER endpoint for ingest, set the number of threads to 1. If you want to optimize your ELSER endpoint for search, set the number of threads to greater than 1.", + "type": "number" + } + }, + "required": [ + "num_allocations", + "num_threads" + ] + }, + "inference.put_elser:AdaptiveAllocations": { + "type": "object", + "properties": { + "enabled": { + "description": "Turn on `adaptive_allocations`.", + "type": "boolean" + }, + "max_number_of_allocations": { + "description": "The maximum number of allocations to scale to.\nIf set, it must be greater than or equal to `min_number_of_allocations`.", + "type": "number" + }, + "min_number_of_allocations": { + "description": "The minimum number of allocations to scale to.\nIf set, it must be greater than or equal to 0.\nIf not defined, the deployment scales to 0.", + "type": "number" + } + } + }, + "inference.put_googleaistudio:GoogleAiStudioTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_googleaistudio:ServiceType": { + "type": "string", + "enum": [ + "googleaistudio" + ] + }, + "inference.put_googleaistudio:GoogleAiStudioServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key of your Google Gemini account.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://ai.google.dev/gemini-api/docs/models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model_id" + ] + }, + "inference.put_googlevertexai:GoogleVertexAITaskType": { + "type": "string", + "enum": [ + "rerank", + "text_embedding" + ] + }, + "inference.put_googlevertexai:ServiceType": { + "type": "string", + "enum": [ + "googlevertexai" + ] + }, + "inference.put_googlevertexai:GoogleVertexAIServiceSettings": { + "type": "object", + "properties": { + "location": { + "externalDocs": { + "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations" + }, + "description": "The name of the location to use for the inference task.\nRefer to the Google documentation for the list of supported locations.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api" + }, + "description": "The name of the model to use for the inference task.\nRefer to the Google documentation for the list of supported models.", + "type": "string" + }, + "project_id": { + "description": "The name of the project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "service_account_json": { + "description": "A valid service account in JSON format for the Google Vertex AI API.", + "type": "string" + } + }, + "required": [ + "location", + "model_id", + "project_id", + "service_account_json" + ] + }, + "inference.put_googlevertexai:GoogleVertexAITaskSettings": { + "type": "object", + "properties": { + "auto_truncate": { + "description": "For a `text_embedding` task, truncate inputs longer than the maximum token length automatically.", + "type": "boolean" + }, + "top_n": { + "description": "For a `rerank` task, the number of the top N documents that should be returned.", "type": "number" } } @@ -48468,6 +49620,77 @@ "url" ] }, + "inference.put_jinaai:JinaAITaskType": { + "type": "string", + "enum": [ + "rerank", + "text_embedding" + ] + }, + "inference.put_jinaai:ServiceType": { + "type": "string", + "enum": [ + "jinaai" + ] + }, + "inference.put_jinaai:JinaAIServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://jina.ai/embeddings/" + }, + "description": "A valid API key of your JinaAI account.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nFor a `rerank` task, it is required.\nFor a `text_embedding` task, it is optional.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "similarity": { + "$ref": "#/components/schemas/inference.put_jinaai:SimilarityType" + } + }, + "required": [ + "api_key" + ] + }, + "inference.put_jinaai:SimilarityType": { + "type": "string", + "enum": [ + "cosine", + "dot_product", + "l2_norm" + ] + }, + "inference.put_jinaai:JinaAITaskSettings": { + "type": "object", + "properties": { + "return_documents": { + "description": "For a `rerank` task, return the doc text within the results.", + "type": "boolean" + }, + "task": { + "$ref": "#/components/schemas/inference.put_jinaai:TextEmbeddingTask" + }, + "top_n": { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "type": "number" + } + } + }, + "inference.put_jinaai:TextEmbeddingTask": { + "type": "string", + "enum": [ + "classification", + "clustering", + "ingest", + "search" + ] + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 683d9674a7..51aa43c9e7 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4765,6 +4765,144 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD +======= + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-amazonbedrock", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "name": "inference.put_amazonbedrock", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.16.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-anthropic", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html", + "name": "inference.put_anthropic", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{anthropic_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.13.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-put-cohere", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html", + "name": "inference.put_cohere", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_cohere" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_cohere" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{cohere_inference_id}" + } + ] + }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -27556,98 +27694,8 @@ "kind": "properties", "properties": [ { - "description": "The type of service supported for the specified task type. In this case, `elastic`.", - "name": "service", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "ServiceType", - "namespace": "inference.put_eis" - } - } - }, - { - "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", - "name": "service_settings", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "EisServiceSettings", - "namespace": "inference.put_eis" - } - } - } - ] - }, - "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "kind": "request", - "name": { - "name": "Request", - "namespace": "inference.put_eis" - }, - "path": [ - { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", - "name": "task_type", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "EisTaskType", - "namespace": "inference.put_eis" - } - } - }, - { - "description": "The unique identifier of the inference endpoint.", - "name": "eis_inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - } - ], - "query": [], - "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" - }, - { - "body": { - "kind": "value", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" - } - } - }, - "kind": "response", - "name": { - "name": "Response", - "namespace": "inference.put_eis" - }, - "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" - }, - { - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "properties", - "properties": [ - { +<<<<<<< HEAD +======= "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", @@ -27662,37 +27710,54 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_hugging_face" + "namespace": "inference.put_amazonbedrock" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "HuggingFaceServiceSettings", - "namespace": "inference.put_hugging_face" + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" } } } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutHuggingFaceRequestExample1": { - "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", "summary": "A text embedding task", - "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" } }, "inherits": { @@ -27704,7 +27769,7 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_hugging_face" + "namespace": "inference.put_amazonbedrock" }, "path": [ { @@ -27714,14 +27779,14 @@ "type": { "kind": "instance_of", "type": { - "name": "HuggingFaceTaskType", - "namespace": "inference.put_hugging_face" + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "huggingface_inference_id", + "name": "amazonbedrock_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27733,7 +27798,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" }, { "body": { @@ -27749,9 +27814,9 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_hugging_face" + "namespace": "inference.put_amazonbedrock" }, - "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -27775,26 +27840,26 @@ } }, { - "description": "The type of service supported for the specified task type. In this case, `openai`.", + "description": "The type of service supported for the specified task type. In this case, `anthropic`.", "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", - "namespace": "inference.put_openai" + "namespace": "inference.put_anthropic" } } }, { - "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { - "name": "OpenAIServiceSettings", - "namespace": "inference.put_openai" + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" } } }, @@ -27805,24 +27870,18 @@ "type": { "kind": "instance_of", "type": { - "name": "OpenAITaskSettings", - "namespace": "inference.put_openai" + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" } } } ] }, - "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "PutOpenAiRequestExample1": { - "description": "Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.", - "summary": "A text embedding task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" - }, - "PutOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" } }, "inherits": { @@ -27834,24 +27893,24 @@ "kind": "request", "name": { "name": "Request", - "namespace": "inference.put_openai" + "namespace": "inference.put_anthropic" }, "path": [ { - "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", "name": "task_type", "required": true, "type": { "kind": "instance_of", "type": { - "name": "OpenAITaskType", - "namespace": "inference.put_openai" + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" } } }, { "description": "The unique identifier of the inference endpoint.", - "name": "openai_inference_id", + "name": "anthropic_inference_id", "required": true, "type": { "kind": "instance_of", @@ -27863,7 +27922,7 @@ } ], "query": [], - "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L28-L82" + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" }, { "body": { @@ -27879,9 +27938,475 @@ "kind": "response", "name": { "name": "Response", - "namespace": "inference.put_openai" + "namespace": "inference.put_anthropic" }, - "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L24" + "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `cohere`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "Settings used to install the inference model.\nThese settings are specific to the `cohere` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" + } + } + } + ] + }, + "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutCohereRequestExample1": { + "description": "Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-Api-key\",\n \"model_id\": \"embed-english-light-v3.0\",\n \"embedding_type\": \"byte\"\n }\n}" + }, + "PutCohereRequestExample2": { + "description": "Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.", + "summary": "A rerank task", + "value": "{\n \"service\": \"cohere\",\n \"service_settings\": {\n \"api_key\": \"Cohere-API-key\",\n \"model_id\": \"rerank-english-v3.0\"\n },\n \"task_settings\": {\n \"top_n\": 10,\n \"return_documents\": true\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_cohere" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CohereTaskType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "cohere_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L28-L82" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) + "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + } + } + } + ] + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `hugging_face`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `hugging_face` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceServiceSettings", + "namespace": "inference.put_hugging_face" + } + } + } + ] + }, + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutHuggingFaceRequestExample1": { + "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"hugging_face\",\n \"service_settings\": {\n \"api_key\": \"hugging-face-access-token\", \n \"url\": \"url-endpoint\" \n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_hugging_face" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "HuggingFaceTaskType", + "namespace": "inference.put_hugging_face" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "huggingface_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L27-L89" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_hugging_face" + }, + "specLocation": "inference/put_hugging_face/PutHuggingFaceResponse.ts#L22-L24" + }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `openai`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_openai" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `openai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenAIServiceSettings", + "namespace": "inference.put_openai" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenAITaskSettings", + "namespace": "inference.put_openai" + } + } + } + ] + }, + "description": "Create an OpenAI inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `openai` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutOpenAiRequestExample1": { + "description": "Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" + }, + "PutOpenAiRequestExample2": { + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", + "summary": "A completion task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_openai" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "OpenAITaskType", + "namespace": "inference.put_openai" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "openai_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L28-L82" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_openai" + }, + "specLocation": "inference/put_openai/PutOpenAiResponse.ts#L22-L24" }, { "attachedBehaviors": [ @@ -100576,6 +101101,175 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD +======= + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" + }, + { + "kind": "enum", + "members": [ + { + "name": "amazonbedrock" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + } + ], + "name": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L84-L86" + }, + { + "kind": "enum", + "members": [ + { + "name": "anthropic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "rerank" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "CohereTaskType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L84-L88" + }, + { + "kind": "enum", + "members": [ + { + "name": "byte" + }, + { + "name": "float" + }, + { + "name": "int8" + } + ], + "name": { + "name": "EmbeddingType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L94-L98" + }, + { + "kind": "enum", + "members": [ + { + "name": "classification" + }, + { + "name": "clustering" + }, + { + "name": "ingest" + }, + { + "name": "search" + } + ], + "name": { + "name": "InputType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L100-L105" + }, + { + "kind": "enum", + "members": [ + { + "name": "cohere" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L90-L92" + }, + { + "kind": "enum", + "members": [ + { + "name": "cosine" + }, + { + "name": "dot_product" + }, + { + "name": "l2_norm" + } + ], + "name": { + "name": "SimilarityType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L107-L111" + }, + { + "kind": "enum", + "members": [ + { + "name": "END" + }, + { + "name": "NONE" + }, + { + "name": "START" + } + ], + "name": { + "name": "TruncateType", + "namespace": "inference.put_cohere" + }, + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L113-L117" + }, + { + "kind": "enum", + "members": [ + { +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) "name": "chat_completion" } ], @@ -121148,6 +121842,416 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" + }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" + }, + { + "kind": "interface", + "name": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "A valid API key for the Anthropic API.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "extDocId": "anothropic-models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" + }, + { + "kind": "interface", + "name": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", + "name": "max_tokens", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", + "extDocId": "anthropic-messages", + "extDocUrl": "https://docs.anthropic.com/en/api/messages", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135" + }, + { + "kind": "interface", + "name": { + "name": "CohereServiceSettings", + "namespace": "inference.put_cohere" + }, + "properties": [ + { + "description": "A valid API key for your Cohere account.\nYou can find or create your Cohere API keys on the Cohere API key settings page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "extDocId": "cohere-api-keys", + "extDocUrl": "https://dashboard.cohere.com/api-keys", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `text_embedding` task, the types of embeddings you want to get back.\nUse `byte` for signed int8 embeddings (this is a synonym of `int8`).\nUse `float` for the default float embeddings.\nUse `int8` for signed int8 embeddings.", + "name": "embedding_type", + "required": false, + "serverDefault": "float", + "type": { + "kind": "instance_of", + "type": { + "name": "EmbeddingType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "For a `completion`, `rerank`, or `text_embedding` task, the name of the model to use for the inference task.\n\n* For the available `completion` models, refer to the [Cohere command docs](https://docs.cohere.com/docs/models#command).\n* For the available `rerank` models, refer to the [Cohere rerank docs](https://docs.cohere.com/reference/rerank-1).\n* For the available `text_embedding` models, refer to [Cohere embed docs](https://docs.cohere.com/reference/embed).\n\nThe default value for a text embedding task is `embed-english-v2.0`.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Cohere.\nBy default, the `cohere` service sets the number of requests allowed per minute to 10000.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "The similarity measure.\nIf the `embedding_type` is `float`, the default value is `dot_product`.\nIf the `embedding_type` is `int8` or `byte`, the default value is `cosine`.", + "name": "similarity", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "SimilarityType", + "namespace": "inference.put_cohere" + } + } + } + ], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160" + }, + { + "kind": "interface", + "name": { + "name": "CohereTaskSettings", + "namespace": "inference.put_cohere" + }, + "properties": [ + { + "description": "For a `text_embedding` task, the type of input passed to the model.\nValid values are:\n\n* `classification`: Use it for embeddings passed through a text classifier.\n* `clustering`: Use it for the embeddings run through a clustering algorithm.\n* `ingest`: Use it for storing document embeddings in a vector database.\n* `search`: Use it for storing embeddings of search queries run against a vector database to find relevant documents.\n\nIMPORTANT: The `input_type` field is required when using embedding models `v3` and higher.", + "name": "input_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InputType", + "namespace": "inference.put_cohere" + } + } + }, + { + "description": "For a `rerank` task, return doc text within the results.", + "name": "return_documents", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + }, + { + "description": "For a `rerank` task, the number of most relevant documents to return.\nIt defaults to the number of the documents.\nIf this inference endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query.", + "name": "top_n", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `text_embedding` task, the method to handle inputs longer than the maximum token length.\nValid values are:\n\n* `END`: When the input exceeds the maximum input token length, the end of the input is discarded.\n* `NONE`: When the input exceeds the maximum input token length, an error is returned.\n* `START`: When the input exceeds the maximum input token length, the start of the input is discarded.", + "name": "truncate", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TruncateType", + "namespace": "inference.put_cohere" + } + } + } + ], + "specLocation": "inference/put_cohere/PutCohereRequest.ts#L162-L194" + }, + { + "kind": "interface", + "name": { +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) "name": "EisServiceSettings", "namespace": "inference.put_eis" }, diff --git a/output/schema/schema.json b/output/schema/schema.json index a92498f4fb..fd532a1c89 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9352,6 +9352,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-amazonbedrock", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-amazon-bedrock.html", + "name": "inference.put_amazonbedrock", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150843,6 +150888,312 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" + }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + } + } + } + ] + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "amazonbedrock_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "amazonbedrock" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" + }, { "kind": "interface", "name": { @@ -153349,9 +153700,9 @@ "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" }, "PutOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index e2873f0fd8..b65c3edb0e 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13252,6 +13252,39 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutAmazonbedrockAmazonBedrockServiceSettings { + access_key: string + model: string + provider?: string + region: string + rate_limit?: InferenceRateLimitSetting + secret_key: string +} + +export interface InferencePutAmazonbedrockAmazonBedrockTaskSettings { + max_new_tokens?: integer + temperature?: float + top_k?: float + top_p?: float +} + +export type InferencePutAmazonbedrockAmazonBedrockTaskType = 'completion' | 'text_embedding' + +export interface InferencePutAmazonbedrockRequest extends RequestBase { + task_type: InferencePutAmazonbedrockAmazonBedrockTaskType + amazonbedrock_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAmazonbedrockServiceType + service_settings: InferencePutAmazonbedrockAmazonBedrockServiceSettings + task_settings?: InferencePutAmazonbedrockAmazonBedrockTaskSettings + } +} + +export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfo + +export type InferencePutAmazonbedrockServiceType = 'amazonbedrock' + export interface InferencePutAnthropicAnthropicServiceSettings { api_key: string model_id: string diff --git a/package-lock.json b/package-lock.json index df19b685a5..4388b85a6e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "@redocly/cli": "^1.33.1", + "@redocly/cli": "^1.34.0", "@stoplight/spectral-cli": "^6.14.2" } }, @@ -489,9 +489,9 @@ } }, "node_modules/@redocly/cli": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.33.1.tgz", - "integrity": "sha512-co+Vr/RfH9Nca3eiYuYvbLxI+5RVOyJ+l56B0SmU5UHfticTUXirO0vxtFmkHmch6YIFVU6BCF4tFbj7ssF8iQ==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.0.tgz", + "integrity": "sha512-Kg/t9zMjZB5cyb0YQLa+gne5E5Rz6wZP/goug1+2qaR17UqeupidBzwqDdr3lszEK3q2A37g4+W7pvdBOkiGQA==", "license": "MIT", "dependencies": { "@opentelemetry/api": "1.9.0", @@ -500,8 +500,8 @@ "@opentelemetry/sdk-trace-node": "1.26.0", "@opentelemetry/semantic-conventions": "1.27.0", "@redocly/config": "^0.22.0", - "@redocly/openapi-core": "1.33.1", - "@redocly/respect-core": "1.33.1", + "@redocly/openapi-core": "1.34.0", + "@redocly/respect-core": "1.34.0", "abort-controller": "^3.0.0", "chokidar": "^3.5.1", "colorette": "^1.2.0", @@ -564,9 +564,9 @@ "license": "MIT" }, "node_modules/@redocly/openapi-core": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.33.1.tgz", - "integrity": "sha512-tL3v8FVwdcCAcruOZV77uxH2ZFtnY3DRPG+rgmlm9hsu5uoatofVSJIJHUroz54KJ8ryeo28wQHhOr8iReGGEQ==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.0.tgz", + "integrity": "sha512-Ji00EiLQRXq0pJIz5pAjGF9MfQvQVsQehc6uIis6sqat8tG/zh25Zi64w6HVGEDgJEzUeq/CuUlD0emu3Hdaqw==", "license": "MIT", "dependencies": { "@redocly/ajv": "^8.11.2", @@ -606,14 +606,14 @@ } }, "node_modules/@redocly/respect-core": { - "version": "1.33.1", - "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.33.1.tgz", - "integrity": "sha512-Sh6TahtuvSzvejkfu74KErdMX6VtrNNRJAtwH9A6R1Igo8WVmrdoFE99uAp/dOL9bpAQPg4oKtrTF60avN7YYA==", + "version": "1.34.0", + "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.0.tgz", + "integrity": "sha512-CO2XxJ0SUYHKixKPTQm2U6QrGLnNhQy88CnX20llCxXDKd485cSioRMZ8MMNhHrnDsUlprSuM3ui2z5JGf1ftw==", "license": "MIT", "dependencies": { "@faker-js/faker": "^7.6.0", "@redocly/ajv": "8.11.2", - "@redocly/openapi-core": "1.33.1", + "@redocly/openapi-core": "1.34.0", "better-ajv-errors": "^1.2.0", "colorette": "^2.0.20", "concat-stream": "^2.0.0", diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index bdb8e80fda..71900fc66c 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -2,6 +2,8 @@ apis,https://www.elastic.co/docs/api/doc/elasticsearch/v8 add-nodes,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/add-elasticsearch-nodes.html alias-update,https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-indices-put-alias aliases-update,https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-indices-update-aliases +amazonbedrock-models,https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html +amazonbedrock-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html analysis-analyzers,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/analysis-analyzers.html analysis-charfilters,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/analysis-charfilters.html analysis-normalizers,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/analysis-normalizers.html @@ -323,6 +325,7 @@ indices-templates,https://www.elastic.co/guide/en/elasticsearch/reference/{branc indices-update-settings,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/indices-update-settings.html infer-trained-model,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-trained-model.html infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-trained-model-deployment.html +inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-amazon-bedrock.html inference-api-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-anthropic.html inference-api-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/delete-inference-api.html inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-inference-api.html diff --git a/specification/_json_spec/inference.put_amazonbedrock.json b/specification/_json_spec/inference.put_amazonbedrock.json new file mode 100644 index 0000000000..266a1800a3 --- /dev/null +++ b/specification/_json_spec/inference.put_amazonbedrock.json @@ -0,0 +1,35 @@ +{ + "inference.put_amazonbedrock": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "description": "Configure an Amazon Bedrock inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "amazonbedrock_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts new file mode 100644 index 0000000000..8ac3d0262f --- /dev/null +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts @@ -0,0 +1,163 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create an Amazon Bedrock inference endpoint. + * + * Creates an inference endpoint to perform an inference task with the `amazonbedrock` service. + * + * >info + * > You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_amazonbedrock + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-amazonbedrock + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{amazonbedrock_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: AmazonBedrockTaskType + /** + * The unique identifier of the inference endpoint. + */ + amazonbedrock_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `amazonbedrock`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `amazonbedrock` service. + */ + service_settings: AmazonBedrockServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AmazonBedrockTaskSettings + } +} + +export enum AmazonBedrockTaskType { + completion, + text_embedding +} + +export enum ServiceType { + amazonbedrock +} + +export class AmazonBedrockServiceSettings { + /** + * A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests. + */ + access_key: string + /** + * The base model ID or an ARN to a custom model based on a foundational model. + * The base model IDs can be found in the Amazon Bedrock documentation. + * Note that the model ID must be available for the provider chosen and your IAM user must have access to the model. + * @ext_doc_id amazonbedrock-models + */ + model: string + /** + * The model provider for your deployment. + * Note that some providers may support only certain task types. + * Supported providers include: + * + * * `amazontitan` - available for `text_embedding` and `completion` task types + * * `anthropic` - available for `completion` task type only + * * `ai21labs` - available for `completion` task type only + * * `cohere` - available for `text_embedding` and `completion` task types + * * `meta` - available for `completion` task type only + * * `mistral` - available for `completion` task type only + */ + provider?: string + /** + * The region that your model or ARN is deployed in. + * The list of available regions per model can be found in the Amazon Bedrock documentation. + * @ext_doc_id amazonbedrock-models + */ + region: string + /** + * This setting helps to minimize the number of rate limit errors returned from Watsonx. + * By default, the `watsonxai` service sets the number of requests allowed per minute to 120. + */ + rate_limit?: RateLimitSetting + /** + * A valid AWS secret key that is paired with the `access_key`. + * For informationg about creating and managing access and secret keys, refer to the AWS documentation. + * @ext_doc_id amazonbedrock-secret-keys + */ + secret_key: string +} + +export class AmazonBedrockTaskSettings { + /** + * For a `completion` task, it sets the maximum number for the output tokens to be generated. + * @server_default 64 + */ + max_new_tokens?: integer + /** + * For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results. + * At temperature 0.0 the model is most deterministic, at temperature 1.0 most random. + * It should not be used if `top_p` or `top_k` is specified. + */ + temperature?: float + /** + * For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability. + * It is only available for anthropic, cohere, and mistral providers. + * It is an alternative to `temperature`; it should not be used if `temperature` is specified. + */ + top_k?: float + /** + * For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens. + * Top-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence. + * It is an alternative to `temperature`; it should not be used if `temperature` is specified. + */ + top_p?: float +} diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml new file mode 100644 index 0000000000..cded037d23 --- /dev/null +++ b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml @@ -0,0 +1,15 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/amazon_bedrock_embeddings" +# type: "request" +value: |- + { + "service": "amazonbedrock", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-embed-text-v2:0" + } + } diff --git a/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml new file mode 100644 index 0000000000..d21fd0d2aa --- /dev/null +++ b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml @@ -0,0 +1,12 @@ +summary: A completion task +description: Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type. +# method_request: "PUT _inference/completion/openai-completion" +# type: "request" +value: |- + { + "service": "openai", + "service_settings": { + "api_key": "OpenAI-API-Key", + "model_id": "gpt-3.5-turbo" + } + } diff --git a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml index d21fd0d2aa..4bd73086b3 100644 --- a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml +++ b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml @@ -1,12 +1,15 @@ summary: A completion task -description: Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type. -# method_request: "PUT _inference/completion/openai-completion" +description: Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task. +# method_request: "PUT _inference/completion/amazon_bedrock_completion" # type: "request" value: |- { - "service": "openai", + "service": "amazonbedrock", "service_settings": { - "api_key": "OpenAI-API-Key", - "model_id": "gpt-3.5-turbo" + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-text-premier-v1:0" } }