diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 6c45fd480d..0e9cfc6281 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17451,52 +17451,7 @@ "content": { "application/json": { "schema": { - "type": "object", - "properties": { - "messages": { - "description": "A list of objects representing the conversation.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:Message" - } - }, - "model": { - "description": "The ID of the model to use.", - "type": "string" - }, - "max_completion_tokens": { - "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", - "type": "number" - }, - "stop": { - "description": "A sequence of strings to control when the model should stop generating additional tokens.", - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "description": "The sampling temperature to use.", - "type": "number" - }, - "tool_choice": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType" - }, - "tools": { - "description": "A list of tools that the model can call.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool" - } - }, - "top_p": { - "description": "Nucleus sampling, an alternative to sampling with temperature.", - "type": "number" - } - }, - "required": [ - "messages" - ] + "type": "object" } } } @@ -17748,6 +17703,51 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/chat_completion/{eis_inference_id}/_stream": { + "post": { + "tags": [ + "inference" + ], + "summary": "Perform a chat completion task through the Elastic Inference Service (EIS)", + "description": "Perform a chat completion inference task with the `elastic` service.", + "operationId": "inference-post-eis-chat-completion", + "parameters": [ + { + "in": "path", + "name": "eis_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object" + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/_types:StreamResult" + } + } + } + } + }, + "x-state": "Added in 9.0.0" + } + }, "/_inference/{task_type}/{eis_inference_id}": { "put": { "tags": [ @@ -76808,176 +76808,6 @@ "valid" ] }, - "inference.chat_completion_unified:Message": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent" - }, - "role": { - "description": "The role of the message author.", - "type": "string" - }, - "tool_call_id": { - "$ref": "#/components/schemas/_types:Id" - }, - "tool_calls": { - "description": "The tool calls generated by the model.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall" - } - } - }, - "required": [ - "role" - ] - }, - "inference.chat_completion_unified:MessageContent": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject" - } - } - ] - }, - "inference.chat_completion_unified:ContentObject": { - "type": "object", - "properties": { - "text": { - "description": "The text content.", - "type": "string" - }, - "type": { - "description": "The type of content.", - "type": "string" - } - }, - "required": [ - "text", - "type" - ] - }, - "inference.chat_completion_unified:ToolCall": { - "type": "object", - "properties": { - "id": { - "$ref": "#/components/schemas/_types:Id" - }, - "function": { - "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction" - }, - "type": { - "description": "The type of the tool call.", - "type": "string" - } - }, - "required": [ - "id", - "function", - "type" - ] - }, - "inference.chat_completion_unified:ToolCallFunction": { - "type": "object", - "properties": { - "arguments": { - "description": "The arguments to call the function with in JSON format.", - "type": "string" - }, - "name": { - "description": "The name of the function to call.", - "type": "string" - } - }, - "required": [ - "arguments", - "name" - ] - }, - "inference.chat_completion_unified:CompletionToolType": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice" - } - ] - }, - "inference.chat_completion_unified:CompletionToolChoice": { - "type": "object", - "properties": { - "type": { - "description": "The type of the tool.", - "type": "string" - }, - "function": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction" - } - }, - "required": [ - "type", - "function" - ] - }, - "inference.chat_completion_unified:CompletionToolChoiceFunction": { - "type": "object", - "properties": { - "name": { - "description": "The name of the function to call.", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "inference.chat_completion_unified:CompletionTool": { - "type": "object", - "properties": { - "type": { - "description": "The type of tool.", - "type": "string" - }, - "function": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction" - } - }, - "required": [ - "type", - "function" - ] - }, - "inference.chat_completion_unified:CompletionToolFunction": { - "type": "object", - "properties": { - "description": { - "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.", - "type": "string" - }, - "name": { - "description": "The name of the function.", - "type": "string" - }, - "parameters": { - "description": "The parameters the functional accepts. This should be formatted as a JSON object.", - "type": "object" - }, - "strict": { - "description": "Whether to enable schema adherence when generating the function call.", - "type": "boolean" - } - }, - "required": [ - "name" - ] - }, "_types:StreamResult": { "type": "object" }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index ef165f4ddf..655bd5617d 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9273,52 +9273,7 @@ "content": { "application/json": { "schema": { - "type": "object", - "properties": { - "messages": { - "description": "A list of objects representing the conversation.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:Message" - } - }, - "model": { - "description": "The ID of the model to use.", - "type": "string" - }, - "max_completion_tokens": { - "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", - "type": "number" - }, - "stop": { - "description": "A sequence of strings to control when the model should stop generating additional tokens.", - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "description": "The sampling temperature to use.", - "type": "number" - }, - "tool_choice": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType" - }, - "tools": { - "description": "A list of tools that the model can call.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool" - } - }, - "top_p": { - "description": "Nucleus sampling, an alternative to sampling with temperature.", - "type": "number" - } - }, - "required": [ - "messages" - ] + "type": "object" } } } @@ -9570,6 +9525,51 @@ "x-state": "Added in 8.11.0" } }, + "/_inference/chat_completion/{eis_inference_id}/_stream": { + "post": { + "tags": [ + "inference" + ], + "summary": "Perform a chat completion task through the Elastic Inference Service (EIS)", + "description": "Perform a chat completion inference task with the `elastic` service.", + "operationId": "inference-post-eis-chat-completion", + "parameters": [ + { + "in": "path", + "name": "eis_inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object" + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/_types:StreamResult" + } + } + } + } + }, + "x-state": "Added in 9.0.0" + } + }, "/_inference/{task_type}/{eis_inference_id}": { "put": { "tags": [ @@ -48000,176 +48000,6 @@ "valid" ] }, - "inference.chat_completion_unified:Message": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent" - }, - "role": { - "description": "The role of the message author.", - "type": "string" - }, - "tool_call_id": { - "$ref": "#/components/schemas/_types:Id" - }, - "tool_calls": { - "description": "The tool calls generated by the model.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall" - } - } - }, - "required": [ - "role" - ] - }, - "inference.chat_completion_unified:MessageContent": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject" - } - } - ] - }, - "inference.chat_completion_unified:ContentObject": { - "type": "object", - "properties": { - "text": { - "description": "The text content.", - "type": "string" - }, - "type": { - "description": "The type of content.", - "type": "string" - } - }, - "required": [ - "text", - "type" - ] - }, - "inference.chat_completion_unified:ToolCall": { - "type": "object", - "properties": { - "id": { - "$ref": "#/components/schemas/_types:Id" - }, - "function": { - "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction" - }, - "type": { - "description": "The type of the tool call.", - "type": "string" - } - }, - "required": [ - "id", - "function", - "type" - ] - }, - "inference.chat_completion_unified:ToolCallFunction": { - "type": "object", - "properties": { - "arguments": { - "description": "The arguments to call the function with in JSON format.", - "type": "string" - }, - "name": { - "description": "The name of the function to call.", - "type": "string" - } - }, - "required": [ - "arguments", - "name" - ] - }, - "inference.chat_completion_unified:CompletionToolType": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice" - } - ] - }, - "inference.chat_completion_unified:CompletionToolChoice": { - "type": "object", - "properties": { - "type": { - "description": "The type of the tool.", - "type": "string" - }, - "function": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction" - } - }, - "required": [ - "type", - "function" - ] - }, - "inference.chat_completion_unified:CompletionToolChoiceFunction": { - "type": "object", - "properties": { - "name": { - "description": "The name of the function to call.", - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "inference.chat_completion_unified:CompletionTool": { - "type": "object", - "properties": { - "type": { - "description": "The type of tool.", - "type": "string" - }, - "function": { - "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction" - } - }, - "required": [ - "type", - "function" - ] - }, - "inference.chat_completion_unified:CompletionToolFunction": { - "type": "object", - "properties": { - "description": { - "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.", - "type": "string" - }, - "name": { - "description": "The name of the function.", - "type": "string" - }, - "parameters": { - "description": "The parameters the functional accepts. This should be formatted as a JSON object.", - "type": "object" - }, - "strict": { - "description": "Whether to enable schema adherence when generating the function call.", - "type": "boolean" - } - }, - "required": [ - "name" - ] - }, "_types:StreamResult": { "type": "object" }, diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index aa66f92b23..ecaa181942 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4495,6 +4495,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "9.0.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.", + "docId": "inference-api-post-eis-chat-completion", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion", + "name": "inference.post_eis_chat_completion", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.post_eis_chat_completion" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.post_eis_chat_completion" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/chat_completion/{eis_inference_id}/_stream" + } + ] + }, { "availability": { "serverless": { @@ -26490,119 +26535,13 @@ ], "body": { "kind": "properties", - "properties": [ - { - "description": "A list of objects representing the conversation.", - "name": "messages", - "required": true, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "Message", - "namespace": "inference.chat_completion_unified" - } - } - } - }, - { - "description": "The ID of the model to use.", - "name": "model", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", - "name": "max_completion_tokens", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "long", - "namespace": "_types" - } - } - }, - { - "description": "A sequence of strings to control when the model should stop generating additional tokens.", - "name": "stop", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - }, - { - "description": "The sampling temperature to use.", - "name": "temperature", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, - { - "description": "Controls which tool is called by the model.", - "name": "tool_choice", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "CompletionToolType", - "namespace": "inference.chat_completion_unified" - } - } - }, - { - "description": "A list of tools that the model can call.", - "name": "tools", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "CompletionTool", - "namespace": "inference.chat_completion_unified" - } - } - } - }, - { - "description": "Nucleus sampling, an alternative to sampling with temperature.", - "name": "top_p", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - } - ] + "properties": [] }, "description": "Perform chat completion inference", "inherits": { "type": { - "name": "RequestBase", - "namespace": "_types" + "name": "RequestChatCompletionBase", + "namespace": "inference._types" } }, "kind": "request", @@ -26639,7 +26578,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L25-L52" }, { "body": { @@ -26936,6 +26875,61 @@ }, "specLocation": "inference/get/GetResponse.ts#L22-L26" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [] + }, + "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.", + "inherits": { + "type": { + "name": "RequestChatCompletionBase", + "namespace": "inference._types" + } + }, + "kind": "request", + "name": { + "name": "Request", + "namespace": "inference.post_eis_chat_completion" + }, + "path": [ + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L46" + }, + { + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "StreamResult", + "namespace": "_types" + } + } + }, + "kind": "response", + "name": { + "name": "Response", + "namespace": "inference.post_eis_chat_completion" + }, + "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts#L22-L24" + }, { "attachedBehaviors": [ "CommonQueryParameters" @@ -100399,7 +100393,7 @@ "name": "CompletionToolType", "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L54-L57", "type": { "items": [ { @@ -100453,7 +100447,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L143-L155" }, { "description": "The tool choice function.", @@ -100476,7 +100470,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L132-L141" }, { "codegenNames": [ @@ -100488,7 +100482,7 @@ "name": "MessageContent", "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L105-L108", "type": { "items": [ { @@ -100545,7 +100539,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71" }, { "kind": "enum", @@ -120438,6 +120432,130 @@ ], "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L32-L37" }, + { + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "kind": "interface", + "name": { + "name": "RequestChatCompletionBase", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "A list of objects representing the conversation.", + "name": "messages", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "Message", + "namespace": "inference.chat_completion_unified" + } + } + } + }, + { + "description": "The ID of the model to use.", + "name": "model", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", + "name": "max_completion_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "long", + "namespace": "_types" + } + } + }, + { + "description": "A sequence of strings to control when the model should stop generating additional tokens.", + "name": "stop", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + }, + { + "description": "The sampling temperature to use.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "Controls which tool is called by the model.", + "name": "tool_choice", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "CompletionToolType", + "namespace": "inference.chat_completion_unified" + } + } + }, + { + "description": "A list of tools that the model can call.", + "name": "tools", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionTool", + "namespace": "inference.chat_completion_unified" + } + } + } + }, + { + "description": "Nucleus sampling, an alternative to sampling with temperature.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/CommonTypes.ts#L28-L61" + }, { "description": "An object representing part of the conversation.", "kind": "interface", @@ -120498,7 +120616,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L110-L130" }, { "description": "A tool call generated by the model.", @@ -120545,7 +120663,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L87-L103" }, { "description": "The function that the model called.", @@ -120580,7 +120698,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L73-L85" }, { "description": "A list of tools that the model can call.", @@ -120615,7 +120733,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L180-L192" }, { "description": "The completion tool function definition.", @@ -120670,7 +120788,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L157-L178" }, { "description": "Defines the completion result.", diff --git a/output/schema/schema.json b/output/schema/schema.json index 8671596959..e84cb90589 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9207,6 +9207,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "9.0.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.", + "docId": "inference-api-post-eis-chat-completion", + "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion", + "name": "inference.post_eis_chat_completion", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.post_eis_chat_completion" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.post_eis_chat_completion" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/chat_completion/{eis_inference_id}/_stream" + } + ] + }, { "availability": { "serverless": { @@ -149167,6 +149212,130 @@ ], "specLocation": "inference/_types/Services.ts#L95-L100" }, + { + "kind": "interface", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "RequestChatCompletionBase", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "A list of objects representing the conversation.", + "name": "messages", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "Message", + "namespace": "inference.chat_completion_unified" + } + } + } + }, + { + "description": "The ID of the model to use.", + "name": "model", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", + "name": "max_completion_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "long", + "namespace": "_types" + } + } + }, + { + "description": "A sequence of strings to control when the model should stop generating additional tokens.", + "name": "stop", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + }, + { + "description": "The sampling temperature to use.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + }, + { + "description": "Controls which tool is called by the model.", + "name": "tool_choice", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "CompletionToolType", + "namespace": "inference.chat_completion_unified" + } + } + }, + { + "description": "A list of tools that the model can call.", + "name": "tools", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionTool", + "namespace": "inference.chat_completion_unified" + } + } + } + }, + { + "description": "Nucleus sampling, an alternative to sampling with temperature.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/_types/CommonTypes.ts#L28-L61" + }, { "kind": "interface", "description": "Defines the response for a rerank request.", @@ -149445,7 +149614,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L215-L227" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L180-L192" }, { "kind": "interface", @@ -149480,7 +149649,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L178-L190" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L143-L155" }, { "kind": "interface", @@ -149503,7 +149672,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L167-L176" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L132-L141" }, { "kind": "interface", @@ -149558,7 +149727,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L192-L213" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L157-L178" }, { "kind": "type_alias", @@ -149570,7 +149739,7 @@ "name": "CompletionToolType", "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L89-L92", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L54-L57", "type": { "kind": "union_of", "items": [ @@ -149624,7 +149793,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L94-L106" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L59-L71" }, { "kind": "interface", @@ -149686,7 +149855,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L145-L165" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L110-L130" }, { "kind": "type_alias", @@ -149698,7 +149867,7 @@ "name": "MessageContent", "namespace": "inference.chat_completion_unified" }, - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L140-L143", + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L105-L108", "type": { "kind": "union_of", "items": [ @@ -149729,119 +149898,13 @@ ], "body": { "kind": "properties", - "properties": [ - { - "description": "A list of objects representing the conversation.", - "name": "messages", - "required": true, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "Message", - "namespace": "inference.chat_completion_unified" - } - } - } - }, - { - "description": "The ID of the model to use.", - "name": "model", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - }, - { - "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", - "name": "max_completion_tokens", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "long", - "namespace": "_types" - } - } - }, - { - "description": "A sequence of strings to control when the model should stop generating additional tokens.", - "name": "stop", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } - } - } - }, - { - "description": "The sampling temperature to use.", - "name": "temperature", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, - { - "description": "Controls which tool is called by the model.", - "name": "tool_choice", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "CompletionToolType", - "namespace": "inference.chat_completion_unified" - } - } - }, - { - "description": "A list of tools that the model can call.", - "name": "tools", - "required": false, - "type": { - "kind": "array_of", - "value": { - "kind": "instance_of", - "type": { - "name": "CompletionTool", - "namespace": "inference.chat_completion_unified" - } - } - } - }, - { - "description": "Nucleus sampling, an alternative to sampling with temperature.", - "name": "top_p", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - } - ] + "properties": [] }, "description": "Perform chat completion inference", "inherits": { "type": { - "name": "RequestBase", - "namespace": "_types" + "name": "RequestChatCompletionBase", + "namespace": "inference._types" } }, "name": { @@ -149877,7 +149940,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L26-L87" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L25-L52" }, { "kind": "response", @@ -149942,7 +150005,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L122-L138" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L87-L103" }, { "kind": "interface", @@ -149977,7 +150040,7 @@ } } ], - "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L108-L120" + "specLocation": "inference/chat_completion_unified/UnifiedRequest.ts#L73-L85" }, { "kind": "request", @@ -150256,6 +150319,61 @@ }, "specLocation": "inference/get/GetResponse.ts#L22-L26" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [] + }, + "description": "Perform a chat completion task through the Elastic Inference Service (EIS).\n\nPerform a chat completion inference task with the `elastic` service.", + "inherits": { + "type": { + "name": "RequestChatCompletionBase", + "namespace": "inference._types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.post_eis_chat_completion" + }, + "path": [ + { + "description": "The unique identifier of the inference endpoint.", + "name": "eis_inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [], + "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts#L23-L46" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "StreamResult", + "namespace": "_types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.post_eis_chat_completion" + }, + "specLocation": "inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts#L22-L24" + }, { "kind": "request", "attachedBehaviors": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index ce67705ca0..a129e27fbc 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13109,6 +13109,17 @@ export interface InferenceRateLimitSetting { requests_per_minute?: integer } +export interface InferenceRequestChatCompletionBase extends RequestBase { + messages: InferenceChatCompletionUnifiedMessage[] + model?: string + max_completion_tokens?: long + stop?: string[] + temperature?: float + tool_choice?: InferenceChatCompletionUnifiedCompletionToolType + tools?: InferenceChatCompletionUnifiedCompletionTool[] + top_p?: float +} + export interface InferenceRerankedInferenceResult { rerank: InferenceRankedDocument[] } @@ -13180,19 +13191,9 @@ export interface InferenceChatCompletionUnifiedMessage { export type InferenceChatCompletionUnifiedMessageContent = string | InferenceChatCompletionUnifiedContentObject[] -export interface InferenceChatCompletionUnifiedRequest extends RequestBase { +export interface InferenceChatCompletionUnifiedRequest extends InferenceRequestChatCompletionBase { inference_id: Id timeout?: Duration - body?: { - messages: InferenceChatCompletionUnifiedMessage[] - model?: string - max_completion_tokens?: long - stop?: string[] - temperature?: float - tool_choice?: InferenceChatCompletionUnifiedCompletionToolType - tools?: InferenceChatCompletionUnifiedCompletionTool[] - top_p?: float - } } export type InferenceChatCompletionUnifiedResponse = StreamResult @@ -13237,6 +13238,12 @@ export interface InferenceGetResponse { endpoints: InferenceInferenceEndpointInfo[] } +export interface InferencePostEisChatCompletionRequest extends InferenceRequestChatCompletionBase { + eis_inference_id: Id +} + +export type InferencePostEisChatCompletionResponse = StreamResult + export interface InferencePutRequest extends RequestBase { task_type?: InferenceTaskType inference_id: Id diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index d793dd61e5..9532dedc92 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -316,6 +316,7 @@ infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/ref inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-delete inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference +inference-api-post-eis-chat-completion,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-post-eis-chat-completion inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html diff --git a/specification/_json_spec/inference.post_eis_chat_completion.json b/specification/_json_spec/inference.post_eis_chat_completion.json new file mode 100644 index 0000000000..e34b14913b --- /dev/null +++ b/specification/_json_spec/inference.post_eis_chat_completion.json @@ -0,0 +1,31 @@ +{ + "inference.post_eis_chat_completion": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-apis.html", + "description": "Perform a chat completion task via the Elastic Inference Service (EIS)" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/chat_completion/{eis_inference_id}/_stream", + "methods": ["POST"], + "parts": { + "eis_inference_id": { + "type": "string", + "description": "The inference ID" + } + } + } + ] + }, + "body": { + "description": "The inference tasks settings to perform" + } + } +} diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts new file mode 100644 index 0000000000..8879466eb3 --- /dev/null +++ b/specification/inference/_types/CommonTypes.ts @@ -0,0 +1,61 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { + CompletionTool, + CompletionToolType, + Message +} from '@inference/chat_completion_unified/UnifiedRequest' +import { RequestBase } from '@_types/Base' +import { float, long } from '@_types/Numeric' + +export interface RequestChatCompletionBase extends RequestBase { + /** + * A list of objects representing the conversation. + */ + messages: Array + /** + * The ID of the model to use. + */ + model?: string + /** + * The upper bound limit for the number of tokens that can be generated for a completion request. + */ + max_completion_tokens?: long + /** + * A sequence of strings to control when the model should stop generating additional tokens. + */ + stop?: Array + /** + * The sampling temperature to use. + */ + temperature?: float + /** + * Controls which tool is called by the model. + */ + tool_choice?: CompletionToolType + /** + * A list of tools that the model can call. + */ + tools?: Array + /** + * Nucleus sampling, an alternative to sampling with temperature. + */ + top_p?: float +} diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index ac460afa95..1932021931 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -17,10 +17,9 @@ * under the License. */ +import { RequestChatCompletionBase } from '@inference/_types/CommonTypes' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' -import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' -import { float, long } from '@_types/Numeric' import { Duration } from '@_types/Time' /** @@ -30,7 +29,7 @@ import { Duration } from '@_types/Time' * @availability serverless stability=stable visibility=public * @doc_id inference-api-chat-completion */ -export interface Request extends RequestBase { +export interface Request extends RequestChatCompletionBase { urls: [ { path: '/_inference/chat_completion/{inference_id}/_stream' @@ -50,40 +49,6 @@ export interface Request extends RequestBase { */ timeout?: Duration } - body: { - /** - * A list of objects representing the conversation. - */ - messages: Array - /** - * The ID of the model to use. - */ - model?: string - /** - * The upper bound limit for the number of tokens that can be generated for a completion request. - */ - max_completion_tokens?: long - /** - * A sequence of strings to control when the model should stop generating additional tokens. - */ - stop?: Array - /** - * The sampling temperature to use. - */ - temperature?: float - /** - * Controls which tool is called by the model. - */ - tool_choice?: CompletionToolType - /** - * A list of tools that the model can call. - */ - tools?: Array - /** - * Nucleus sampling, an alternative to sampling with temperature. - */ - top_p?: float - } } /** diff --git a/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts b/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts new file mode 100644 index 0000000000..6088a1b0a1 --- /dev/null +++ b/specification/inference/post_eis_chat_completion/PostEisChatCompletionRequest.ts @@ -0,0 +1,46 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { RequestChatCompletionBase } from '@inference/_types/CommonTypes' +import { Id } from '@_types/common' + +/** + * Perform a chat completion task through the Elastic Inference Service (EIS). + * + * Perform a chat completion inference task with the `elastic` service. + * @rest_spec_name inference.post_eis_chat_completion + * @availability stack since=9.0.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-post-eis-chat-completion + */ +export interface Request extends RequestChatCompletionBase { + urls: [ + { + path: '/_inference/chat_completion/{eis_inference_id}/_stream' + methods: ['POST'] + } + ] + path_parts: { + /** + * The unique identifier of the inference endpoint. + */ + eis_inference_id: Id + } +} diff --git a/specification/inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts b/specification/inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts new file mode 100644 index 0000000000..74b823bf40 --- /dev/null +++ b/specification/inference/post_eis_chat_completion/PostEisChatCompletionResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { StreamResult } from '@_types/Binary' + +export class Response { + body: StreamResult +} diff --git a/specification/inference/post_eis_chat_completion/examples/PostEisChatCompletionRequestExample.yaml b/specification/inference/post_eis_chat_completion/examples/PostEisChatCompletionRequestExample.yaml new file mode 100644 index 0000000000..575da1bfa6 --- /dev/null +++ b/specification/inference/post_eis_chat_completion/examples/PostEisChatCompletionRequestExample.yaml @@ -0,0 +1,17 @@ +summary: A chat completion task +description: Run `POST /_inference/chat_completion//_stream` to perform a streaming chat completion task type. +# method_request: "POST /_inference/chat_completion/.rainbow-sprinkles-elastic/_stream" +# type: "request" +value: |- + { + "parameters": + "messages": [ + { + "role": "user", + "content": "Say yes if it works." + } + ], + "temperature": 0.7, + "max_completion_tokens": 300 + } + }