From 4e5d68dae32b3cb096deefa3dc4c61cda2afd82e Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 25 Mar 2025 09:04:52 -0700 Subject: [PATCH] Add Amazon Bedrock inference API (#4022) (cherry picked from commit f5eaaab2433911c3853e68272e8bcd5e5f7cd5f1) --- output/openapi/elasticsearch-openapi.json | 167 +++++++- .../elasticsearch-serverless-openapi.json | 167 +++++++- output/schema/schema-serverless.json | 277 +++++++++++++- output/schema/schema.json | 355 +++++++++++++++++- output/typescript/types.ts | 33 ++ specification/_doc_ids/table.csv | 3 + .../inference.put_amazonbedrock.json | 35 ++ .../PutAmazonBedrockRequest.ts | 163 ++++++++ .../PutAmazonBedrockResponse.ts | 24 ++ .../PutAmazonBedrockRequestExample1.yaml | 15 + .../PutAmazonBedrockRequestExample2.yaml | 12 + .../request/PutOpenAiRequestExample2.yaml | 13 +- 12 files changed, 1251 insertions(+), 13 deletions(-) create mode 100644 specification/_json_spec/inference.put_amazonbedrock.json create mode 100644 specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts create mode 100644 specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts create mode 100644 specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml create mode 100644 specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 66976687ea..ad798529c6 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17937,6 +17937,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{amazonbedrock_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Amazon Bedrock inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-amazonbedrock", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "amazonbedrock_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAmazonBedrockRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{anthropic_inference_id}": { "put": { "tags": [ @@ -18928,8 +19014,8 @@ }, "PutOpenAiRequestExample2": { "summary": "A completion task", - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } } } @@ -78004,6 +78090,83 @@ } } }, + "inference.put_amazonbedrock:AmazonBedrockTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_amazonbedrock:ServiceType": { + "type": "string", + "enum": [ + "amazonbedrock" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockServiceSettings": { + "type": "object", + "properties": { + "access_key": { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "type": "string" + }, + "model": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "type": "string" + }, + "region": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "secret_key": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html" + }, + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "type": "string" + } + }, + "required": [ + "access_key", + "model", + "region", + "secret_key" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { + "type": "object", + "properties": { + "max_new_tokens": { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + } + } + }, "inference.put_anthropic:AnthropicTaskType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 08b556aa48..57fe2ecf41 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9903,6 +9903,92 @@ "x-state": "Added in 8.16.0" } }, + "/_inference/{task_type}/{amazonbedrock_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Amazon Bedrock inference endpoint", + "description": "Creates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-amazonbedrock", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "amazonbedrock_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_amazonbedrock:AmazonBedrockTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAmazonBedrockRequestExample1": { + "summary": "A text embedding task", + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "summary": "A completion task", + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{anthropic_inference_id}": { "put": { "tags": [ @@ -10894,8 +10980,8 @@ }, "PutOpenAiRequestExample2": { "summary": "A completion task", - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } } } @@ -49334,6 +49420,83 @@ } } }, + "inference.put_amazonbedrock:AmazonBedrockTaskType": { + "type": "string", + "enum": [ + "completion", + "text_embedding" + ] + }, + "inference.put_amazonbedrock:ServiceType": { + "type": "string", + "enum": [ + "amazonbedrock" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockServiceSettings": { + "type": "object", + "properties": { + "access_key": { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "type": "string" + }, + "model": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "type": "string" + }, + "provider": { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "type": "string" + }, + "region": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html" + }, + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "secret_key": { + "externalDocs": { + "url": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html" + }, + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "type": "string" + } + }, + "required": [ + "access_key", + "model", + "region", + "secret_key" + ] + }, + "inference.put_amazonbedrock:AmazonBedrockTaskSettings": { + "type": "object", + "properties": { + "max_new_tokens": { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "type": "number" + }, + "temperature": { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "type": "number" + } + } + }, "inference.put_anthropic:AnthropicTaskType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 25f9c2e433..59e4233aab 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4765,15 +4765,28 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD "since": "8.16.0", +======= +<<<<<<< HEAD +======= + "since": "8.12.0", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "stability": "stable", "visibility": "public" } }, +<<<<<<< HEAD "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-alibabacloud", "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-alibabacloud-ai-search.html", "name": "inference.put_alibabacloud", +======= + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-amazonbedrock", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "name": "inference.put_amazonbedrock", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "privileges": { "cluster": [ "manage_inference" @@ -4781,7 +4794,11 @@ }, "request": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, "requestBodyRequired": false, "requestMediaType": [ @@ -4789,7 +4806,11 @@ ], "response": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, "responseMediaType": [ "application/json" @@ -4799,7 +4820,11 @@ "methods": [ "PUT" ], +<<<<<<< HEAD "path": "/_inference/{task_type}/{alibabacloud_inference_id}" +======= + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } ] }, @@ -4817,7 +4842,11 @@ }, "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-anthropic", +<<<<<<< HEAD "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-anthropic.html", +======= + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "inference.put_anthropic", "privileges": { "cluster": [ @@ -4855,6 +4884,7 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD "since": "8.14.0", "stability": "stable", "visibility": "public" @@ -4900,6 +4930,8 @@ "visibility": "public" }, "stack": { +======= +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "since": "8.13.0", "stability": "stable", "visibility": "public" @@ -4907,7 +4939,11 @@ }, "description": "Create a Cohere inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `cohere` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "docId": "inference-api-put-cohere", +<<<<<<< HEAD "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-cohere.html", +======= + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/branch/infer-service-cohere.html", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "inference.put_cohere", "privileges": { "cluster": [ @@ -4945,6 +4981,10 @@ "visibility": "public" }, "stack": { +<<<<<<< HEAD +======= +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "since": "8.12.0", "stability": "stable", "visibility": "public" @@ -27997,9 +28037,17 @@ "kind": "properties", "properties": [ { +<<<<<<< HEAD "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= +<<<<<<< HEAD +======= + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "chunking_settings", "required": false, "type": { @@ -28011,26 +28059,43 @@ } }, { +<<<<<<< HEAD "description": "The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.", +======= + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "service", "required": true, "type": { "kind": "instance_of", "type": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } } }, { +<<<<<<< HEAD "description": "Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.", +======= + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "service_settings", "required": true, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } } }, @@ -28041,13 +28106,19 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudTaskSettings", "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } } } ] }, +<<<<<<< HEAD "description": "Create an AlibabaCloud AI Search inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { "PutAlibabaCloudRequestExample1": { @@ -28069,6 +28140,19 @@ "description": "Run `PUT _inference/text_embedding/alibabacloud_ai_search_embeddings` to create an inference endpoint that performs a text embedding task.", "summary": "A text embedding task", "value": "{\n \"service\": \"alibabacloud-ai-search\",\n \"service_settings\": {\n \"api_key\": \"AlibabaCloud-API-Key\",\n \"service_id\": \"ops-text-embedding-001\",\n \"host\": \"default-j01.platform-cn-shanghai.opensearch.aliyuncs.com\",\n \"workspace\": \"default\"\n }\n}" +======= + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } }, "inherits": { @@ -28080,7 +28164,11 @@ "kind": "request", "name": { "name": "Request", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" +======= + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, "path": [ { @@ -28090,14 +28178,23 @@ "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "AlibabaCloudTaskType", "namespace": "inference.put_alibabacloud" +======= + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } } }, { "description": "The unique identifier of the inference endpoint.", +<<<<<<< HEAD "name": "alibabacloud_inference_id", +======= + "name": "amazonbedrock_inference_id", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "required": true, "type": { "kind": "instance_of", @@ -28109,7 +28206,11 @@ } ], "query": [], +<<<<<<< HEAD "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L27-L80" +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, { "body": { @@ -28125,9 +28226,15 @@ "kind": "response", "name": { "name": "Response", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudResponse.ts#L22-L24" +======= + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, { "attachedBehaviors": [ @@ -28139,7 +28246,11 @@ { "description": "The chunking configuration object.", "extDocId": "inference-chunking", +<<<<<<< HEAD "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "chunking_settings", "required": false, "type": { @@ -28263,6 +28374,7 @@ { "description": "The chunking configuration object.", "extDocId": "inference-chunking", +<<<<<<< HEAD "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", "name": "chunking_settings", "required": false, @@ -28394,6 +28506,9 @@ "description": "The chunking configuration object.", "extDocId": "inference-chunking", "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", +======= + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "chunking_settings", "required": false, "type": { @@ -28521,6 +28636,10 @@ "kind": "properties", "properties": [ { +<<<<<<< HEAD +======= +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "description": "The type of service supported for the specified task type. In this case, `elastic`.", "name": "service", "required": true, @@ -29554,9 +29673,9 @@ "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" }, "PutOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } }, "inherits": { @@ -102310,6 +102429,7 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD "name": "completion" }, { @@ -102319,27 +102439,51 @@ "name": "space_embedding" }, { +======= +<<<<<<< HEAD +======= + "name": "completion" + }, + { +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "text_embedding" } ], "name": { +<<<<<<< HEAD "name": "AlibabaCloudTaskType", "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L82-L87" +======= + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, { "kind": "enum", "members": [ { +<<<<<<< HEAD "name": "alibabacloud-ai-search" +======= + "name": "amazonbedrock" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } ], "name": { "name": "ServiceType", +<<<<<<< HEAD "namespace": "inference.put_alibabacloud" }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" +======= + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, { "kind": "enum", @@ -102374,6 +102518,7 @@ "name": "completion" }, { +<<<<<<< HEAD "name": "text_embedding" } ], @@ -102403,6 +102548,8 @@ "name": "completion" }, { +======= +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "rerank" }, { @@ -102511,6 +102658,10 @@ "kind": "enum", "members": [ { +<<<<<<< HEAD +======= +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "chat_completion" } ], @@ -123295,6 +123446,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AlibabaCloudServiceSettings", "namespace": "inference.put_alibabacloud" }, @@ -123302,6 +123454,17 @@ { "description": "A valid API key for the AlibabaCloud AI Search API.", "name": "api_key", +======= +<<<<<<< HEAD +======= + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "required": true, "type": { "kind": "instance_of", @@ -123312,10 +123475,17 @@ } }, { +<<<<<<< HEAD "description": "The name of the host address used for the inference task.\nYou can find the host address in the API keys section of the documentation.", "extDocId": "alibabacloud-api-keys", "extDocUrl": "https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key", "name": "host", +======= + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "required": true, "type": { "kind": "instance_of", @@ -123326,7 +123496,37 @@ } }, { +<<<<<<< HEAD "description": "This setting helps to minimize the number of rate limit errors returned from AlibabaCloud AI Search.\nBy default, the `alibabacloud-ai-search` service sets the number of requests allowed per minute to `1000`.", +======= + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "rate_limit", "required": false, "type": { @@ -123338,6 +123538,7 @@ } }, { +<<<<<<< HEAD "description": "The name of the model service to use for the inference task.\nThe following service IDs are available for the `completion` task:\n\n* `ops-qwen-turbo`\n* `qwen-turbo`\n* `qwen-plus`\n* `qwen-max รท qwen-max-longcontext`\n\nThe following service ID is available for the `rerank` task:\n\n* `ops-bge-reranker-larger`\n\nThe following service ID is available for the `sparse_embedding` task:\n\n* `ops-text-sparse-embedding-001`\n\nThe following service IDs are available for the `text_embedding` task:\n\n`ops-text-embedding-001`\n`ops-text-embedding-zh-001`\n`ops-text-embedding-en-001`\n`ops-text-embedding-002`", "name": "service_id", "required": true, @@ -123352,6 +123553,12 @@ { "description": "The name of the workspace used for the inference task.", "name": "workspace", +======= + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "required": true, "type": { "kind": "instance_of", @@ -123362,7 +123569,11 @@ } } ], +<<<<<<< HEAD "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L93-L138" +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, { "kind": "interface", @@ -123389,6 +123600,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AlibabaCloudTaskSettings", "namespace": "inference.put_alibabacloud" }, @@ -123402,23 +123614,77 @@ "type": { "name": "string", "namespace": "_builtins" +======= + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } } }, { +<<<<<<< HEAD "description": "For a `sparse_embedding` task, it affects whether the token name will be returned in the response.\nIt defaults to `false`, which means only the token ID will be returned in the response.", "name": "return_token", +======= + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "required": false, "type": { "kind": "instance_of", "type": { +<<<<<<< HEAD "name": "boolean", "namespace": "_builtins" +======= + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) } } } ], +<<<<<<< HEAD "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L140-L154" +======= + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) }, { "kind": "interface", @@ -123530,6 +123796,7 @@ { "kind": "interface", "name": { +<<<<<<< HEAD "name": "AzureOpenAIServiceSettings", "namespace": "inference.put_azureopenai" }, @@ -123644,6 +123911,8 @@ { "kind": "interface", "name": { +======= +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "CohereServiceSettings", "namespace": "inference.put_cohere" }, @@ -123775,6 +124044,10 @@ { "kind": "interface", "name": { +<<<<<<< HEAD +======= +>>>>>>> f5eaaab24 (Add Amazon Bedrock inference API (#4022)) +>>>>>>> 2282fffba (Add Amazon Bedrock inference API (#4022)) "name": "EisServiceSettings", "namespace": "inference.put_eis" }, diff --git a/output/schema/schema.json b/output/schema/schema.json index 33f5f263d6..ac6eee9aaa 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9396,6 +9396,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-amazonbedrock", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-amazon-bedrock.html", + "name": "inference.put_amazonbedrock", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -151190,6 +151235,312 @@ }, "specLocation": "inference/put_alibabacloud/PutAlibabaCloudRequest.ts#L89-L91" }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests.", + "name": "access_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The base model ID or an ARN to a custom model based on a foundational model.\nThe base model IDs can be found in the Amazon Bedrock documentation.\nNote that the model ID must be available for the provider chosen and your IAM user must have access to the model.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The model provider for your deployment.\nNote that some providers may support only certain task types.\nSupported providers include:\n\n* `amazontitan` - available for `text_embedding` and `completion` task types\n* `anthropic` - available for `completion` task type only\n* `ai21labs` - available for `completion` task type only\n* `cohere` - available for `text_embedding` and `completion` task types\n* `meta` - available for `completion` task type only\n* `mistral` - available for `completion` task type only", + "name": "provider", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The region that your model or ARN is deployed in.\nThe list of available regions per model can be found in the Amazon Bedrock documentation.", + "extDocId": "amazonbedrock-models", + "extDocUrl": "https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html", + "name": "region", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Watsonx.\nBy default, the `watsonxai` service sets the number of requests allowed per minute to 120.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + }, + { + "description": "A valid AWS secret key that is paired with the `access_key`.\nFor informationg about creating and managing access and secret keys, refer to the AWS documentation.", + "extDocId": "amazonbedrock-secret-keys", + "extDocUrl": "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html", + "name": "secret_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L95-L137" + }, + { + "kind": "interface", + "name": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + }, + "properties": [ + { + "description": "For a `completion` task, it sets the maximum number for the output tokens to be generated.", + "name": "max_new_tokens", + "required": false, + "serverDefault": 64, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results.\nAt temperature 0.0 the model is most deterministic, at temperature 1.0 most random.\nIt should not be used if `top_p` or `top_k` is specified.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability.\nIt is only available for anthropic, cohere, and mistral providers.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens.\nTop-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence.\nIt is an alternative to `temperature`; it should not be used if `temperature` is specified.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L139-L163" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + }, + { + "name": "text_embedding" + } + ], + "name": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L86-L89" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `amazonbedrock`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `amazonbedrock` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockServiceSettings", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskSettings", + "namespace": "inference.put_amazonbedrock" + } + } + } + ] + }, + "description": "Create an Amazon Bedrock inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `amazonbedrock` service.\n\n>info\n> You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAmazonBedrockRequestExample1": { + "description": "Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task.", + "summary": "A text embedding task", + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-embed-text-v2:0\"\n }\n}" + }, + "PutAmazonBedrockRequestExample2": { + "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "summary": "A completion task", + "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_amazonbedrock" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AmazonBedrockTaskType", + "namespace": "inference.put_amazonbedrock" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "amazonbedrock_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L28-L84" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "amazonbedrock" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_amazonbedrock" + }, + "specLocation": "inference/put_amazonbedrock/PutAmazonBedrockRequest.ts#L91-L93" + }, { "kind": "interface", "name": { @@ -154169,9 +154520,9 @@ "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"text-embedding-3-small\",\n \"dimensions\": 128\n }\n}" }, "PutOpenAiRequestExample2": { - "description": "Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type.", + "description": "Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.", "summary": "A completion task", - "value": "{\n \"service\": \"openai\",\n \"service_settings\": {\n \"api_key\": \"OpenAI-API-Key\",\n \"model_id\": \"gpt-3.5-turbo\"\n }\n}" + "value": "{\n \"service\": \"amazonbedrock\",\n \"service_settings\": {\n \"access_key\": \"AWS-access-key\",\n \"secret_key\": \"AWS-secret-key\",\n \"region\": \"us-east-1\",\n \"provider\": \"amazontitan\",\n \"model\": \"amazon.titan-text-premier-v1:0\"\n }\n}" } }, "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index c820c883f2..fa4b944f52 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13279,6 +13279,39 @@ export type InferencePutAlibabacloudResponse = InferenceInferenceEndpointInfo export type InferencePutAlibabacloudServiceType = 'alibabacloud-ai-search' +export interface InferencePutAmazonbedrockAmazonBedrockServiceSettings { + access_key: string + model: string + provider?: string + region: string + rate_limit?: InferenceRateLimitSetting + secret_key: string +} + +export interface InferencePutAmazonbedrockAmazonBedrockTaskSettings { + max_new_tokens?: integer + temperature?: float + top_k?: float + top_p?: float +} + +export type InferencePutAmazonbedrockAmazonBedrockTaskType = 'completion' | 'text_embedding' + +export interface InferencePutAmazonbedrockRequest extends RequestBase { + task_type: InferencePutAmazonbedrockAmazonBedrockTaskType + amazonbedrock_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAmazonbedrockServiceType + service_settings: InferencePutAmazonbedrockAmazonBedrockServiceSettings + task_settings?: InferencePutAmazonbedrockAmazonBedrockTaskSettings + } +} + +export type InferencePutAmazonbedrockResponse = InferenceInferenceEndpointInfo + +export type InferencePutAmazonbedrockServiceType = 'amazonbedrock' + export interface InferencePutAnthropicAnthropicServiceSettings { api_key: string model_id: string diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 6400e5e4b6..7190d187e9 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -3,6 +3,8 @@ add-nodes,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/add-e alias-update,https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-indices-put-alias aliases-update,https://www.elastic.co/docs/api/doc/elasticsearch/v8/operation/operation-indices-update-aliases alibabacloud-api-keys,https://opensearch.console.aliyun.com/cn-shanghai/rag/api-key +amazonbedrock-models,https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html +amazonbedrock-secret-keys,https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html analysis-analyzers,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/analysis-analyzers.html analysis-charfilters,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/analysis-charfilters.html analysis-normalizers,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/analysis-normalizers.html @@ -329,6 +331,7 @@ indices-update-settings,https://www.elastic.co/guide/en/elasticsearch/reference/ infer-trained-model,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-trained-model.html infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-trained-model-deployment.html inference-api-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-anthropic.html +inference-api-amazonbedrock,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-amazon-bedrock.html inference-api-delete,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/delete-inference-api.html inference-api-get,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/get-inference-api.html inference-api-post,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/post-inference-api.html diff --git a/specification/_json_spec/inference.put_amazonbedrock.json b/specification/_json_spec/inference.put_amazonbedrock.json new file mode 100644 index 0000000000..266a1800a3 --- /dev/null +++ b/specification/_json_spec/inference.put_amazonbedrock.json @@ -0,0 +1,35 @@ +{ + "inference.put_amazonbedrock": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-amazon-bedrock.html", + "description": "Configure an Amazon Bedrock inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{amazonbedrock_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "amazonbedrock_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts new file mode 100644 index 0000000000..8ac3d0262f --- /dev/null +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockRequest.ts @@ -0,0 +1,163 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create an Amazon Bedrock inference endpoint. + * + * Creates an inference endpoint to perform an inference task with the `amazonbedrock` service. + * + * >info + * > You need to provide the access and secret keys only once, during the inference model creation. The get inference API does not retrieve your access or secret keys. After creating the inference model, you cannot change the associated key pairs. If you want to use a different access and secret key pair, delete the inference model and recreate it with the same name and the updated keys. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_amazonbedrock + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-amazonbedrock + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{amazonbedrock_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + */ + task_type: AmazonBedrockTaskType + /** + * The unique identifier of the inference endpoint. + */ + amazonbedrock_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `amazonbedrock`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `amazonbedrock` service. + */ + service_settings: AmazonBedrockServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AmazonBedrockTaskSettings + } +} + +export enum AmazonBedrockTaskType { + completion, + text_embedding +} + +export enum ServiceType { + amazonbedrock +} + +export class AmazonBedrockServiceSettings { + /** + * A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests. + */ + access_key: string + /** + * The base model ID or an ARN to a custom model based on a foundational model. + * The base model IDs can be found in the Amazon Bedrock documentation. + * Note that the model ID must be available for the provider chosen and your IAM user must have access to the model. + * @ext_doc_id amazonbedrock-models + */ + model: string + /** + * The model provider for your deployment. + * Note that some providers may support only certain task types. + * Supported providers include: + * + * * `amazontitan` - available for `text_embedding` and `completion` task types + * * `anthropic` - available for `completion` task type only + * * `ai21labs` - available for `completion` task type only + * * `cohere` - available for `text_embedding` and `completion` task types + * * `meta` - available for `completion` task type only + * * `mistral` - available for `completion` task type only + */ + provider?: string + /** + * The region that your model or ARN is deployed in. + * The list of available regions per model can be found in the Amazon Bedrock documentation. + * @ext_doc_id amazonbedrock-models + */ + region: string + /** + * This setting helps to minimize the number of rate limit errors returned from Watsonx. + * By default, the `watsonxai` service sets the number of requests allowed per minute to 120. + */ + rate_limit?: RateLimitSetting + /** + * A valid AWS secret key that is paired with the `access_key`. + * For informationg about creating and managing access and secret keys, refer to the AWS documentation. + * @ext_doc_id amazonbedrock-secret-keys + */ + secret_key: string +} + +export class AmazonBedrockTaskSettings { + /** + * For a `completion` task, it sets the maximum number for the output tokens to be generated. + * @server_default 64 + */ + max_new_tokens?: integer + /** + * For a `completion` task, it is a number between 0.0 and 1.0 that controls the apparent creativity of the results. + * At temperature 0.0 the model is most deterministic, at temperature 1.0 most random. + * It should not be used if `top_p` or `top_k` is specified. + */ + temperature?: float + /** + * For a `completion` task, it limits samples to the top-K most likely words, balancing coherence and variability. + * It is only available for anthropic, cohere, and mistral providers. + * It is an alternative to `temperature`; it should not be used if `temperature` is specified. + */ + top_k?: float + /** + * For a `completion` task, it is a number in the range of 0.0 to 1.0, to eliminate low-probability tokens. + * Top-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence. + * It is an alternative to `temperature`; it should not be used if `temperature` is specified. + */ + top_p?: float +} diff --git a/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_amazonbedrock/PutAmazonBedrockResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml new file mode 100644 index 0000000000..cded037d23 --- /dev/null +++ b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample1.yaml @@ -0,0 +1,15 @@ +summary: A text embedding task +description: Run `PUT _inference/text_embedding/amazon_bedrock_embeddings` to create an inference endpoint that performs a text embedding task. +# method_request: "PUT _inference/text_embedding/amazon_bedrock_embeddings" +# type: "request" +value: |- + { + "service": "amazonbedrock", + "service_settings": { + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-embed-text-v2:0" + } + } diff --git a/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml new file mode 100644 index 0000000000..d21fd0d2aa --- /dev/null +++ b/specification/inference/put_amazonbedrock/examples/request/PutAmazonBedrockRequestExample2.yaml @@ -0,0 +1,12 @@ +summary: A completion task +description: Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type. +# method_request: "PUT _inference/completion/openai-completion" +# type: "request" +value: |- + { + "service": "openai", + "service_settings": { + "api_key": "OpenAI-API-Key", + "model_id": "gpt-3.5-turbo" + } + } diff --git a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml index d21fd0d2aa..4bd73086b3 100644 --- a/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml +++ b/specification/inference/put_openai/examples/request/PutOpenAiRequestExample2.yaml @@ -1,12 +1,15 @@ summary: A completion task -description: Run `PUT _inference/completion/openai-completion` to create an inference endpoint to perform a completion task type. -# method_request: "PUT _inference/completion/openai-completion" +description: Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task. +# method_request: "PUT _inference/completion/amazon_bedrock_completion" # type: "request" value: |- { - "service": "openai", + "service": "amazonbedrock", "service_settings": { - "api_key": "OpenAI-API-Key", - "model_id": "gpt-3.5-turbo" + "access_key": "AWS-access-key", + "secret_key": "AWS-secret-key", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-text-premier-v1:0" } }