diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index a6324ca4e1..d83d418931 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17669,6 +17669,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{eis_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Elastic Inference Service (EIS) inference endpoint", + "description": "Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "operationId": "inference-put-eis", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_eis:EisTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "eis_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_eis:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -76883,6 +76951,42 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_eis:EisTaskType": { + "type": "string", + "enum": [ + "chat_completion" + ] + }, + "inference.put_eis:ServiceType": { + "type": "string", + "enum": [ + "elastic" + ] + }, + "inference.put_eis:EisServiceSettings": { + "type": "object", + "properties": { + "model_id": { + "description": "The name of the model to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "model_id" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ @@ -76935,15 +77039,6 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_openai:OpenAITaskSettings": { "type": "object", "properties": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 3cbc638020..80358caf02 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9496,6 +9496,74 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/{task_type}/{eis_inference_id}": { + "put": { + "tags": [ + "inference" + ], + "summary": "Create an Elastic Inference Service (EIS) inference endpoint", + "description": "Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "operationId": "inference-put-eis", + "parameters": [ + { + "in": "path", + "name": "task_type", + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference.put_eis:EisTaskType" + }, + "style": "simple" + }, + { + "in": "path", + "name": "eis_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "service": { + "$ref": "#/components/schemas/inference.put_eis:ServiceType" + }, + "service_settings": { + "$ref": "#/components/schemas/inference.put_eis:EisServiceSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + } + }, + "x-state": "Added in 8.12.0" + } + }, "/_inference/{task_type}/{openai_inference_id}": { "put": { "tags": [ @@ -48080,6 +48148,42 @@ "inference._types:ServiceSettings": { "type": "object" }, + "inference.put_eis:EisTaskType": { + "type": "string", + "enum": [ + "chat_completion" + ] + }, + "inference.put_eis:ServiceType": { + "type": "string", + "enum": [ + "elastic" + ] + }, + "inference.put_eis:EisServiceSettings": { + "type": "object", + "properties": { + "model_id": { + "description": "The name of the model to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + } + }, + "required": [ + "model_id" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, "inference.put_openai:OpenAITaskType": { "type": "string", "enum": [ @@ -48132,15 +48236,6 @@ "model_id" ] }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, "inference.put_openai:OpenAITaskSettings": { "type": "object", "properties": { diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 885af064be..8bacd50a53 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4546,6 +4546,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "docId": "inference-api-put-eis", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html", + "name": "inference.put_eis", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{eis_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -26815,6 +26860,98 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + } + } + } + ] + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -100017,6 +100154,32 @@ ], "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106" }, + { + "kind": "enum", + "members": [ + { + "name": "chat_completion" + } + ], + "name": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66" + }, + { + "kind": "enum", + "members": [ + { + "name": "elastic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" + }, { "kind": "enum", "members": [ @@ -120334,6 +120497,62 @@ ], "specLocation": "inference/_types/Services.ts#L60-L89" }, + { + "kind": "interface", + "name": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + }, + "properties": [ + { + "description": "The name of the model to use for the inference task.", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82" + }, + { + "kind": "interface", + "name": { + "name": "RateLimitSetting", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of requests allowed per minute.", + "name": "requests_per_minute", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/Services.ts#L95-L100" + }, { "kind": "interface", "name": { @@ -120421,28 +120640,6 @@ ], "specLocation": "inference/put_openai/PutOpenAiRequest.ts#L94-L136" }, - { - "kind": "interface", - "name": { - "name": "RateLimitSetting", - "namespace": "inference._types" - }, - "properties": [ - { - "description": "The number of requests allowed per minute.", - "name": "requests_per_minute", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "integer", - "namespace": "_types" - } - } - } - ], - "specLocation": "inference/_types/Services.ts#L95-L100" - }, { "kind": "interface", "name": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 32cfd0cb11..df3d01216b 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9258,6 +9258,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.12.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "docId": "inference-api-put-eis", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html", + "name": "inference.put_eis", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "PUT" + ], + "path": "/_inference/{task_type}/{eis_inference_id}" + } + ] + }, { "availability": { "serverless": { @@ -149969,6 +150014,158 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "interface", + "name": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + }, + "properties": [ + { + "description": "The name of the model to use for the inference task.", + "name": "model_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "This setting helps to minimize the number of rate limit errors returned.\nBy default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`.", + "name": "rate_limit", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "RateLimitSetting", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/put_eis/PutEisRequest.ts#L72-L82" + }, + { + "kind": "enum", + "members": [ + { + "name": "chat_completion" + } + ], + "name": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L64-L66" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The type of service supported for the specified task type. In this case, `elastic`.", + "name": "service", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ServiceType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "Settings used to install the inference model. These settings are specific to the `elastic` service.", + "name": "service_settings", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisServiceSettings", + "namespace": "inference.put_eis" + } + } + } + ] + }, + "description": "Create an Elastic Inference Service (EIS) inference endpoint.\n\nCreate an inference endpoint to perform an inference task through the Elastic Inference Service (EIS).", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.put_eis" + }, + "path": [ + { + "description": "The type of the inference task that the model will perform.\nNOTE: The `chat_completion` task type only supports streaming and only through the _stream API.", + "name": "task_type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "EisTaskType", + "namespace": "inference.put_eis" + } + } + }, + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/put_eis/PutEisRequest.ts#L24-L62" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisResponse.ts#L22-L24" + }, + { + "kind": "enum", + "members": [ + { + "name": "elastic" + } + ], + "name": { + "name": "ServiceType", + "namespace": "inference.put_eis" + }, + "specLocation": "inference/put_eis/PutEisRequest.ts#L68-L70" + }, { "kind": "interface", "name": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 7a173a0727..d3ed64efc8 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13228,6 +13228,26 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferencePutEisEisServiceSettings { + model_id: string + rate_limit?: InferenceRateLimitSetting +} + +export type InferencePutEisEisTaskType = 'chat_completion' + +export interface InferencePutEisRequest extends RequestBase { + task_type: InferencePutEisEisTaskType + eis_inference_id: Id + body?: { + service: InferencePutEisServiceType + service_settings: InferencePutEisEisServiceSettings + } +} + +export type InferencePutEisResponse = InferenceInferenceEndpointInfo + +export type InferencePutEisServiceType = 'elastic' + export interface InferencePutOpenaiOpenAIServiceSettings { api_key: string dimensions?: integer diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 25d42971c6..88e00f3c33 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -317,6 +317,7 @@ inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put +inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html inference-api-put-watsonx,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-watsonx inference-api-stream,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-stream-inference diff --git a/specification/_json_spec/inference.put.eis.json b/specification/_json_spec/inference.put.eis.json new file mode 100644 index 0000000000..281758769e --- /dev/null +++ b/specification/_json_spec/inference.put.eis.json @@ -0,0 +1,35 @@ +{ + "inference.put_eis": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html", + "description": "Configure an EIS inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{eis_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "eis_inference_id": { + "type": "string", + "description": "The inference ID" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's service settings" + } + } +} diff --git a/specification/inference/put_eis/PutEisRequest.ts b/specification/inference/put_eis/PutEisRequest.ts new file mode 100644 index 0000000000..c788009a32 --- /dev/null +++ b/specification/inference/put_eis/PutEisRequest.ts @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RateLimitSetting } from '@inference/_types/Services' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Create an Elastic Inference Service (EIS) inference endpoint. + * + * Create an inference endpoint to perform an inference task through the Elastic Inference Service (EIS). + * @rest_spec_name inference.put_eis + * @availability stack since=8.12.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-put-eis + */ +export interface Request extends RequestBase { + urls: [ + { + path: '/_inference/{task_type}/{eis_inference_id}' + methods: ['PUT'] + } + ] + path_parts: { + /** + * The type of the inference task that the model will perform. + * NOTE: The `chat_completion` task type only supports streaming and only through the _stream API. + */ + task_type: EisTaskType + /** + * The unique identifier of the inference endpoint. + */ + eis_inference_id: Id + } + body: { + /** + * The type of service supported for the specified task type. In this case, `elastic`. + */ + service: ServiceType + /** + * Settings used to install the inference model. These settings are specific to the `elastic` service. + */ + service_settings: EisServiceSettings + } +} + +export enum EisTaskType { + chat_completion +} + +export enum ServiceType { + elastic +} + +export class EisServiceSettings { + /** + * The name of the model to use for the inference task. + */ + model_id: string + /** + * This setting helps to minimize the number of rate limit errors returned. + * By default, the `elastic` service sets the number of requests allowed per minute to `240` in case of `chat_completion`. + */ + rate_limit?: RateLimitSetting +} diff --git a/specification/inference/put_eis/PutEisResponse.ts b/specification/inference/put_eis/PutEisResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/put_eis/PutEisResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} diff --git a/specification/inference/put_eis/example/request/PutEisRequestExample.yaml b/specification/inference/put_eis/example/request/PutEisRequestExample.yaml new file mode 100644 index 0000000000..573bc66d22 --- /dev/null +++ b/specification/inference/put_eis/example/request/PutEisRequestExample.yaml @@ -0,0 +1,11 @@ +summary: A chat completion task +description: Run `PUT _inference/chat_completion/chat-completion-endpoint` to create an inference endpoint to perform a chat completion task type. +# method_request: "PUT _inference/chat_completion/chat-completion-endpoint" +# type: "request" +value: |- + { + "service": "elastic", + "service_settings": { + "model_id": "rainbow-sprinkles" + } + }