diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 8b4c44e41b..1fd6405d91 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17762,6 +17762,86 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{anthropic_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Anthropic inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-anthropic", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "anthropic_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_anthropic:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{task_type}/{cohere_inference_id}": { "put": { "tags": [ @@ -18669,7 +18749,7 @@ ] }, "examples": { - "InferenceRequestExample1": { + "PutWatsonxRequestExample1": { "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.", "value": "{\n \"service\": \"watsonxai\",\n \"service_settings\": {\n \"api_key\": \"Watsonx-API-Key\", \n \"url\": \"Wastonx-URL\", \n \"model_id\": \"ibm/slate-30m-english-rtrvr\",\n \"project_id\": \"IBM-Cloud-ID\", \n \"api_version\": \"2024-03-14\"\n }\n}" } @@ -77618,6 +77698,74 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_anthropic:AnthropicTaskType": { + "type": "string", + "enum": [ + "completion" + ] + }, + "inference.put_anthropic:ServiceType": { + "type": "string", + "enum": [ + "anthropic" + ] + }, + "inference.put_anthropic:AnthropicServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key for the Anthropic API.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model_id" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_anthropic:AnthropicTaskSettings": { + "type": "object", + "properties": { + "max_tokens": { + "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", + "type": "number" + }, + "temperature": { + "externalDocs": { + "url": "https://docs.anthropic.com/en/api/messages" + }, + "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "type": "number" + } + }, + "required": [ + "max_tokens" + ] + }, "inference.put_cohere:CohereTaskType": { "type": "string", "enum": [ @@ -77668,15 +77816,6 @@ "int8" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_cohere:SimilarityType": { "type": "string", "enum": [ diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index ea63b99c01..99b7323b6b 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9584,6 +9584,86 @@ "x-state": "Added in 9.0.0" } }, + "/_inference/{task_type}/{anthropic_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Anthropic inference endpoint", + "description": "Create an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "operationId": "inference-put-anthropic", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "anthropic_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "$ref": "#/components/schemas/inference.put_anthropic:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference.put_anthropic:AnthropicTaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "examples": { + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + } + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.16.0" + } + }, "/_inference/{task_type}/{cohere_inference_id}": { "put": { "tags": [ @@ -10491,7 +10571,7 @@ ] }, "examples": { - "InferenceRequestExample1": { + "PutWatsonxRequestExample1": { "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.", "value": "{\n \"service\": \"watsonxai\",\n \"service_settings\": {\n \"api_key\": \"Watsonx-API-Key\", \n \"url\": \"Wastonx-URL\", \n \"model_id\": \"ibm/slate-30m-english-rtrvr\",\n \"project_id\": \"IBM-Cloud-ID\", \n \"api_version\": \"2024-03-14\"\n }\n}" } @@ -48810,6 +48890,74 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_anthropic:AnthropicTaskType": { + "type": "string", + "enum": [ + "completion" + ] + }, + "inference.put_anthropic:ServiceType": { + "type": "string", + "enum": [ + "anthropic" + ] + }, + "inference.put_anthropic:AnthropicServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "description": "A valid API key for the Anthropic API.", + "type": "string" + }, + "model_id": { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "api_key", + "model_id" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference.put_anthropic:AnthropicTaskSettings": { + "type": "object", + "properties": { + "max_tokens": { + "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", + "type": "number" + }, + "temperature": { + "externalDocs": { + "url": "https://docs.anthropic.com/en/api/messages" + }, + "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", + "type": "number" + }, + "top_k": { + "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "type": "number" + }, + "top_p": { + "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "type": "number" + } + }, + "required": [ + "max_tokens" + ] + }, "inference.put_cohere:CohereTaskType": { "type": "string", "enum": [ @@ -48860,15 +49008,6 @@ "int8" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_cohere:SimilarityType": { "type": "string", "enum": [ diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index cf050f9049..38113d72ff 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4591,6 +4591,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.16.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-anthropic", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html", + "name": "inference.put_anthropic", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{anthropic_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -27338,6 +27383,130 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `anthropic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + } + } + } + ] + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "anthropic_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -28625,7 +28794,7 @@ }, "description": "Create a Watsonx inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "InferenceRequestExample1": { + "PutWatsonxRequestExample1": { "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.", "value": "{\n \"service\": \"watsonxai\",\n \"service_settings\": {\n \"api_key\": \"Watsonx-API-Key\", \n \"url\": \"Wastonx-URL\", \n \"model_id\": \"ibm/slate-30m-english-rtrvr\",\n \"project_id\": \"IBM-Cloud-ID\", \n \"api_version\": \"2024-03-14\"\n }\n}" } @@ -101818,6 +101987,32 @@ ], "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71" }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + } + ], + "name": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L84-L86" + }, + { + "kind": "enum", + "members": [ + { + "name": "anthropic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" + }, { "kind": "enum", "members": [ @@ -122637,6 +122832,135 @@ ], "specLocation": "inference/_types/Services.ts#L60-L89" }, + { + "kind": "interface", + "name": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "A valid API key for the Anthropic API.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "extDocId": "anothropic-models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" + }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, + { + "kind": "interface", + "name": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", + "name": "max_tokens", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", + "extDocId": "anthropic-messages", + "extDocUrl": "https://docs.anthropic.com/en/api/messages", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135" + }, { "kind": "interface", "name": { @@ -122710,28 +123034,6 @@ ], "specLocation": "inference/put_cohere/PutCohereRequest.ts#L119-L160" }, - { - "kind": "interface", - "name": { - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/_types/Services.ts#L95-L100" - }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index efe8902d41..4c3397d5fc 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9303,6 +9303,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.16.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "docId": "inference-api-anthropic", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html", + "name": "inference.put_anthropic", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{anthropic_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -150783,6 +150828,263 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "interface", + "name": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "A valid API key for the Anthropic API.", + "name": "api_key", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the model to use for the inference task.\nRefer to the Anthropic documentation for the list of supported models.", + "extDocId": "anothropic-models", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned from Anthropic.\nBy default, the `anthropic` service sets the number of requests allowed per minute to 50.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L92-L108" + }, + { + "kind": "interface", + "name": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + }, + "properties": [ + { + "description": "For a `completion` task, it is the maximum number of tokens to generate before stopping.", + "name": "max_tokens", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it is the amount of randomness injected into the response.\nFor more details about the supported range, refer to Anthropic documentation.", + "extDocId": "anthropic-messages", + "extDocUrl": "https://docs.anthropic.com/en/api/messages", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to only sample from the top K options for each subsequent token.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_k", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "For a `completion` task, it specifies to use Anthropic's nucleus sampling.\nIn nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability.\nYou should either alter `temperature` or `top_p`, but not both.\nIt is recommended for advanced use cases only.\nYou usually only need to use `temperature`.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L110-L135" + }, + { + "kind": "enum", + "members": [ + { + "name": "completion" + } + ], + "name": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L84-L86" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The chunking configuration object.", + "extDocId": "inference-chunking", + "extDocUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html#infer-chunking-config", + "name": "chunking_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "InferenceChunkingSettings", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of service supported for the specified task type. In this case, `anthropic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `watsonxai` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicServiceSettings", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "Settings to configure the inference task.\nThese settings are specific to the task type you specified.", + "name": "task_settings", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskSettings", + "namespace": "inference.put_anthropic" + } + } + } + ] + }, + "description": "Create an Anthropic inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `anthropic` service.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", + "examples": { + "PutAnthropicRequestExample1": { + "description": "Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task.", + "value": "{\n \"service\": \"anthropic\",\n \"service_settings\": {\n \"api_key\": \"Anthropic-Api-Key\",\n \"model_id\": \"Model-ID\"\n },\n \"task_settings\": {\n \"max_tokens\": 1024\n }\n}" + } + }, + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_anthropic" + }, + "path": [ + { + "description": "The task type.\nThe only valid task type for the model to perform is `completion`.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "AnthropicTaskType", + "namespace": "inference.put_anthropic" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "anthropic_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L28-L82" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "anthropic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_anthropic" + }, + "specLocation": "inference/put_anthropic/PutAnthropicRequest.ts#L88-L90" + }, { "kind": "interface", "name": { @@ -153426,7 +153728,7 @@ }, "description": "Create a Watsonx inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `watsonxai` service.\nYou need an IBM Cloud Databases for Elasticsearch deployment to use the `watsonxai` inference service.\nYou can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.\n\nWhen you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.", "examples": { - "InferenceRequestExample1": { + "PutWatsonxRequestExample1": { "description": "Run `PUT _inference/text_embedding/watsonx-embeddings` to create an Watonsx inference endpoint that performs a text embedding task.", "value": "{\n \"service\": \"watsonxai\",\n \"service_settings\": {\n \"api_key\": \"Watsonx-API-Key\", \n \"url\": \"Wastonx-URL\", \n \"model_id\": \"ibm/slate-30m-english-rtrvr\",\n \"project_id\": \"IBM-Cloud-ID\", \n \"api_version\": \"2024-03-14\"\n }\n}" } diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 07af9d9e72..945582582c 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13252,6 +13252,36 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutAnthropicAnthropicServiceSettings { + api_key: string + model_id: string + rate_limit?: InferenceRateLimitSetting +} + +export interface InferencePutAnthropicAnthropicTaskSettings { + max_tokens: integer + temperature?: float + top_k?: integer + top_p?: float +} + +export type InferencePutAnthropicAnthropicTaskType = 'completion' + +export interface InferencePutAnthropicRequest extends RequestBase { + task_type: InferencePutAnthropicAnthropicTaskType + anthropic_inference_id: Id + body?: { + chunking_settings?: InferenceInferenceChunkingSettings + service: InferencePutAnthropicServiceType + service_settings: InferencePutAnthropicAnthropicServiceSettings + task_settings?: InferencePutAnthropicAnthropicTaskSettings + } +} + +export type InferencePutAnthropicResponse = InferenceInferenceEndpointInfo + +export type InferencePutAnthropicServiceType = 'anthropic' + export interface InferencePutCohereCohereServiceSettings { api_key: string embedding_type?: InferencePutCohereEmbeddingType diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 4f3450eb2b..038b2b6b89 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -11,6 +11,8 @@ analysis-tokenizers,https://www.elastic.co/guide/en/elasticsearch/reference/curr analysis,https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis.html analyze-repository,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-snapshot-repository-analyze analyzer-anatomy,https://www.elastic.co/guide/en/elasticsearch/reference/current/analyzer-anatomy.html +anthropic-messages,https://docs.anthropic.com/en/api/messages +anthropic-models,https://docs.anthropic.com/en/docs/about-claude/models/all-models#model-names api-date-math-index-names,https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#api-date-math-index-names api-root,https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-info append-processor,https://www.elastic.co/guide/en/elasticsearch/reference/current/append-processor.html @@ -320,6 +322,7 @@ indices-templates,https://www.elastic.co/guide/en/elasticsearch/reference/curren indices-update-settings,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-indices-put-settings infer-trained-model,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ml-infer-trained-model infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-trained-model-deployment.html +inference-api-anthropic,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference diff --git a/specification/_json_spec/inference.put_anthropic.json b/specification/_json_spec/inference.put_anthropic.json new file mode 100644 index 0000000000..dce56157c7 --- /dev/null +++ b/specification/_json_spec/inference.put_anthropic.json @@ -0,0 +1,35 @@ +{ + "inference.put_anthropic": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-anthropic.html", + "description": "Configure an Anthropic inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{anthropic_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "anthropic_inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/put_anthropic/PutAnthropicRequest.ts b/specification/inference/put_anthropic/PutAnthropicRequest.ts new file mode 100644 index 0000000000..95d2f61d24 --- /dev/null +++ b/specification/inference/put_anthropic/PutAnthropicRequest.ts @@ -0,0 +1,135 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + InferenceChunkingSettings, + RateLimitSetting +} from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { float, integer } from '@_types/Numeric' + +/** + * Create an Anthropic inference endpoint. + * + * Create an inference endpoint to perform an inference task with the `anthropic` service. + * + * When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running. + * After creating the endpoint, wait for the model deployment to complete before using it. + * To verify the deployment status, use the get trained model statistics API. + * Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. + * Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources. + * @rest_spec_name inference.put_anthropic + * @availability stack since=8.16.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-anthropic + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{anthropic_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The task type. + * The only valid task type for the model to perform is `completion`. + */ + task_type: AnthropicTaskType + /** + * The unique identifier of the inference endpoint. + */ + anthropic_inference_id: Id + } + body: { + /** + * The chunking configuration object. + * @ext_doc_id inference-chunking + */ + chunking_settings?: InferenceChunkingSettings + /** + * The type of service supported for the specified task type. In this case, `anthropic`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `watsonxai` service. + */ + service_settings: AnthropicServiceSettings + /** + * Settings to configure the inference task. + * These settings are specific to the task type you specified. + */ + task_settings?: AnthropicTaskSettings + } +} + +export enum AnthropicTaskType { + completion +} + +export enum ServiceType { + anthropic +} + +export class AnthropicServiceSettings { + /** + * A valid API key for the Anthropic API. + */ + api_key: string + /** + * The name of the model to use for the inference task. + * Refer to the Anthropic documentation for the list of supported models. + * @ext_doc_id anothropic-models + */ + model_id: string + /** + * This setting helps to minimize the number of rate limit errors returned from Anthropic. + * By default, the `anthropic` service sets the number of requests allowed per minute to 50. + */ + rate_limit?: RateLimitSetting +} + +export class AnthropicTaskSettings { + /** + * For a `completion` task, it is the maximum number of tokens to generate before stopping. + */ + max_tokens: integer + /** + * For a `completion` task, it is the amount of randomness injected into the response. + * For more details about the supported range, refer to Anthropic documentation. + * @ext_doc_id anthropic-messages + */ + temperature?: float + /** + * For a `completion` task, it specifies to only sample from the top K options for each subsequent token. + * It is recommended for advanced use cases only. + * You usually only need to use `temperature`. + */ + top_k?: integer + /** + * For a `completion` task, it specifies to use Anthropic's nucleus sampling. + * In nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches the specified probability. + * You should either alter `temperature` or `top_p`, but not both. + * It is recommended for advanced use cases only. + * You usually only need to use `temperature`. + */ + top_p?: float +} diff --git a/specification/inference/put_anthropic/PutAnthropicResponse.ts b/specification/inference/put_anthropic/PutAnthropicResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_anthropic/PutAnthropicResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_anthropic/examples/request/PutAnthropicRequestExample1.yaml b/specification/inference/put_anthropic/examples/request/PutAnthropicRequestExample1.yaml new file mode 100644 index 0000000000..112a15f88d --- /dev/null +++ b/specification/inference/put_anthropic/examples/request/PutAnthropicRequestExample1.yaml @@ -0,0 +1,15 @@ +# summary: +description: Run `PUT _inference/completion/anthropic_completion` to create an inference endpoint that performs a completion task. +# method_request: "PUT _inference/completion/anthropic_completion" +# type: "request" +value: |- + { + "service": "anthropic", + "service_settings": { + "api_key": "Anthropic-Api-Key", + "model_id": "Model-ID" + }, + "task_settings": { + "max_tokens": 1024 + } + } diff --git a/specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml b/specification/inference/put_watsonx/examples/request/PutWatsonxRequestExample1.yaml similarity index 100% rename from specification/inference/put_watsonx/examples/request/InferenceRequestExample1.yaml rename to specification/inference/put_watsonx/examples/request/PutWatsonxRequestExample1.yaml