From ea37f167c74e4b786d8372a7da555242c055a26d Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Mon, 16 Dec 2024 14:38:17 -0500 Subject: [PATCH 1/3] Adding the unified api --- output/schema/schema.json | 910 +++++++++++++++++- output/typescript/types.ts | 101 ++ .../inference.unified_inference.json | 45 + specification/inference/_types/Results.ts | 114 +++ .../unified_inference/UnifiedRequest.ts | 214 ++++ .../unified_inference/UnifiedResponse.ts | 24 + 6 files changed, 1407 insertions(+), 1 deletion(-) create mode 100644 specification/_json_spec/inference.unified_inference.json create mode 100644 specification/inference/unified_inference/UnifiedRequest.ts create mode 100644 specification/inference/unified_inference/UnifiedResponse.ts diff --git a/output/schema/schema.json b/output/schema/schema.json index 478520ccf2..4e3359e7a4 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -8592,6 +8592,51 @@ } ] }, + { + "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, + "stack": { + "since": "8.18.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Perform inference on the service using the Unified Schema", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", + "name": "inference.unified_inference", + "request": { + "name": "Request", + "namespace": "inference.unified_inference" + }, + "requestBodyRequired": false, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.unified_inference" + }, + "responseMediaType": [ + "text/event-stream" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/{inference_id}/_unified" + }, + { + "methods": [ + "POST" + ], + "path": "/_inference/{task_type}/{inference_id}/_unified" + } + ] + }, { "availability": { "serverless": { @@ -141024,6 +141069,114 @@ }, "specLocation": "indices/validate_query/IndicesValidateQueryResponse.ts#L23-L30" }, + { + "kind": "interface", + "description": "Represent a completion choice returned from a model.", + "name": { + "name": "CompletionChoice", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The delta generated by the model.", + "name": "delta", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CompletionDelta", + "namespace": "inference._types" + } + } + }, + { + "description": "The reason the model stopped generating tokens.", + "name": "finish_reason", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The index of the choice in the array of choices field.", + "name": "index", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L143-L159" + }, + { + "kind": "interface", + "name": { + "name": "CompletionDelta", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The contents of the chunked message.", + "name": "content", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The refusal message.", + "name": "refusal", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The role of the author of the message.", + "name": "role", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The tool calls made by the model.", + "name": "tool_calls", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "ResultToolCall", + "namespace": "inference._types" + } + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L124-L141" + }, { "kind": "interface", "description": "The completion result object", @@ -141075,7 +141228,7 @@ } } ], - "specLocation": "inference/_types/Results.ts#L91-L96" + "specLocation": "inference/_types/Results.ts#L205-L210" }, { "kind": "type_alias", @@ -141331,6 +141484,99 @@ ], "specLocation": "inference/_types/Results.ts#L67-L77" }, + { + "kind": "interface", + "description": "The function the model wants to call.", + "name": { + "name": "ResultFunctionCall", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The arguments to call the function with in that the model generated in JSON format.", + "name": "arguments", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the function to call.", + "name": "name", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L91-L103" + }, + { + "kind": "interface", + "description": "The tool call made by the model.", + "name": { + "name": "ResultToolCall", + "namespace": "inference._types" + }, + "properties": [ + { + "name": "index", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + }, + { + "description": "The identifier of the tool call.", + "name": "id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The function the model wants to call.", + "name": "function", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "ResultFunctionCall", + "namespace": "inference._types" + } + } + }, + { + "description": "The type of the tool.", + "name": "type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L105-L122" + }, { "kind": "type_alias", "name": { @@ -141467,6 +141713,127 @@ ], "specLocation": "inference/_types/Results.ts#L53-L58" }, + { + "kind": "interface", + "description": "Respresents the result format for a completion request using the Unified Inference API.", + "name": { + "name": "UnifiedInferenceResult", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "A unique identifier for the chat completion", + "name": "id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "A list of completion choices.", + "name": "choices", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionChoice", + "namespace": "inference._types" + } + } + } + }, + { + "description": "The model that generated the completion.", + "name": "model", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The object type.", + "name": "object", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The token usage statistics for the entire request.", + "name": "usage", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "Usage", + "namespace": "inference._types" + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L179-L203" + }, + { + "kind": "interface", + "description": "The token usage statistics for the entire request.", + "name": { + "name": "Usage", + "namespace": "inference._types" + }, + "properties": [ + { + "description": "The number of tokens in the generated completion.", + "name": "completion_tokens", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + }, + { + "description": "The number of tokens in the prompt.", + "name": "prompt_tokens", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + }, + { + "description": "The sum of completion_tokens and prompt_tokens.", + "name": "total_tokens", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/_types/Results.ts#L161-L177" + }, { "kind": "request", "attachedBehaviors": [ @@ -141842,6 +142209,547 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "interface", + "description": "A list of tools that the model can call.", + "name": { + "name": "CompletionTool", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "The type of tool.", + "name": "type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The function definition.", + "name": "function", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CompletionToolFunction", + "namespace": "inference.unified_inference" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L142-L154" + }, + { + "kind": "interface", + "description": "Controls which tool is called by the model.", + "name": { + "name": "CompletionToolChoice", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "The type of the tool.", + "name": "type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The tool choice function.", + "name": "function", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "CompletionToolChoiceFunction", + "namespace": "inference.unified_inference" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L105-L117" + }, + { + "kind": "interface", + "description": "The tool choice function.", + "name": { + "name": "CompletionToolChoiceFunction", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "The name of the function to call.", + "name": "name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L94-L103" + }, + { + "kind": "interface", + "description": "The completion tool function definition.", + "name": { + "name": "CompletionToolFunction", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.", + "name": "description", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the function.", + "name": "name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The parameters the functional accepts. This should be formatted as a JSON object.", + "name": "parameters", + "required": false, + "type": { + "kind": "user_defined_value" + } + }, + { + "description": "Whether to enable schema adherence when generating the function call.", + "name": "strict", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "boolean", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L119-L140" + }, + { + "kind": "interface", + "description": "An object style representation of a single portion of a conversation.", + "name": { + "name": "ContentObject", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "The text content.", + "name": "text", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The type of content.", + "name": "type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L26-L38" + }, + { + "kind": "interface", + "description": "An object representing part of the conversation.", + "name": { + "name": "Message", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "The content of the message.", + "name": "content", + "required": true, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "ContentObject", + "namespace": "inference.unified_inference" + } + } + } + ] + } + }, + { + "description": "The role of the message author.", + "name": "role", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The tool call that this message is responding to.", + "name": "tool_call_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The tool calls generated by the model.", + "name": "tool_calls", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "ToolCall", + "namespace": "inference.unified_inference" + } + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L72-L92" + }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "A list of objects representing the conversation.", + "name": "messages", + "required": true, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "Message", + "namespace": "inference.unified_inference" + } + } + } + }, + { + "description": "The ID of the model to use.", + "name": "model", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", + "name": "max_completion_tokens", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + }, + { + "description": "A sequence of strings to control when the model should stop generating additional tokens.", + "name": "stop", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + }, + { + "description": "The sampling temperature to use.", + "name": "temperature", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + }, + { + "description": "Controls which tool is called by the model.", + "name": "tool_choice", + "required": false, + "type": { + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "instance_of", + "type": { + "name": "CompletionToolChoice", + "namespace": "inference.unified_inference" + } + } + ] + } + }, + { + "description": "A list of tools that the model can call.", + "name": "tools", + "required": false, + "type": { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "CompletionTool", + "namespace": "inference.unified_inference" + } + } + } + }, + { + "description": "Nucleus sampling, an alternative to sampling with temperature.", + "name": "top_p", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "number", + "namespace": "_builtins" + } + } + } + ] + }, + "description": "Perform inference on the service using the Unified Schema", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.unified_inference" + }, + "path": [ + { + "description": "The task type", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + }, + { + "description": "The inference Id", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + } + ], + "query": [ + { + "description": "Specifies the amount of time to wait for the inference request to complete.", + "name": "timeout", + "required": false, + "serverDefault": "30s", + "type": { + "kind": "instance_of", + "type": { + "name": "Duration", + "namespace": "_types" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L156-L214" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "UnifiedInferenceResult", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.unified_inference" + }, + "specLocation": "inference/unified_inference/UnifiedResponse.ts#L22-L24" + }, + { + "kind": "interface", + "description": "A tool call generated by the model.", + "name": { + "name": "ToolCall", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "The identifier of the tool call.", + "name": "id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The function that the model called.", + "name": "function", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "ToolCallFunction", + "namespace": "inference.unified_inference" + } + } + }, + { + "description": "The type of the tool call.", + "name": "type", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L54-L70" + }, + { + "kind": "interface", + "description": "The function that the model called.", + "name": { + "name": "ToolCallFunction", + "namespace": "inference.unified_inference" + }, + "properties": [ + { + "description": "The arguments to call the function with in JSON format.", + "name": "arguments", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The name of the function to call.", + "name": "name", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "inference/unified_inference/UnifiedRequest.ts#L40-L52" + }, { "kind": "interface", "inherits": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 074d11f89d..d8e150d8c1 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -12714,6 +12714,19 @@ export interface IndicesValidateQueryResponse { error?: string } +export interface InferenceCompletionChoice { + delta: InferenceCompletionDelta + finish_reason?: string + index: number +} + +export interface InferenceCompletionDelta { + content?: string + refusal?: string + role?: string + tool_calls?: InferenceResultToolCall[] +} + export interface InferenceCompletionResult { result: string } @@ -12751,6 +12764,18 @@ export interface InferenceRankedDocument { text?: string } +export interface InferenceResultFunctionCall { + arguments?: string + name?: string +} + +export interface InferenceResultToolCall { + index: number + id?: string + function?: InferenceResultFunctionCall + type?: string +} + export type InferenceServiceSettings = any export interface InferenceSparseEmbeddingResult { @@ -12771,6 +12796,20 @@ export interface InferenceTextEmbeddingResult { embedding: InferenceDenseVector } +export interface InferenceUnifiedInferenceResult { + id: string + choices: InferenceCompletionChoice[] + model: string + object: string + usage?: InferenceUsage +} + +export interface InferenceUsage { + completion_tokens: number + prompt_tokens: number + total_tokens: number +} + export interface InferenceDeleteRequest extends RequestBase { task_type?: InferenceTaskType inference_id: Id @@ -12810,6 +12849,68 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +export interface InferenceUnifiedInferenceCompletionTool { + type: string + function: InferenceUnifiedInferenceCompletionToolFunction +} + +export interface InferenceUnifiedInferenceCompletionToolChoice { + type: string + function: InferenceUnifiedInferenceCompletionToolChoiceFunction +} + +export interface InferenceUnifiedInferenceCompletionToolChoiceFunction { + name: string +} + +export interface InferenceUnifiedInferenceCompletionToolFunction { + description?: string + name: string + parameters?: any + strict?: boolean +} + +export interface InferenceUnifiedInferenceContentObject { + text: string + type: string +} + +export interface InferenceUnifiedInferenceMessage { + content: string | InferenceUnifiedInferenceContentObject[] + role: string + tool_call_id?: string + tool_calls?: InferenceUnifiedInferenceToolCall[] +} + +export interface InferenceUnifiedInferenceRequest extends RequestBase { + task_type?: InferenceTaskType + inference_id: Id + timeout?: Duration + body?: { + messages: InferenceUnifiedInferenceMessage[] + model?: string + max_completion_tokens?: number + stop?: string[] + temperature?: number + tool_choice?: string | InferenceUnifiedInferenceCompletionToolChoice + tools?: InferenceUnifiedInferenceCompletionTool[] + top_p?: number + } +} + +export type InferenceUnifiedInferenceResponse = InferenceUnifiedInferenceResult + +export interface InferenceUnifiedInferenceToolCall { + id: string + function: InferenceUnifiedInferenceToolCallFunction + type: string +} + +export interface InferenceUnifiedInferenceToolCallFunction { + arguments: string + name: string +} + export interface IngestAppendProcessor extends IngestProcessorBase { field: Field value: any | any[] diff --git a/specification/_json_spec/inference.unified_inference.json b/specification/_json_spec/inference.unified_inference.json new file mode 100644 index 0000000000..84182d19f8 --- /dev/null +++ b/specification/_json_spec/inference.unified_inference.json @@ -0,0 +1,45 @@ +{ + "inference.unified_inference": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", + "description": "Perform inference using the Unified Schema" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["text/event-stream"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{inference_id}/_unified", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + }, + { + "path": "/_inference/{task_type}/{inference_id}/_unified", + "methods": ["POST"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts index 1a35289bab..83bfe8a442 100644 --- a/specification/inference/_types/Results.ts +++ b/specification/inference/_types/Results.ts @@ -88,6 +88,120 @@ export class InferenceResult { rerank?: Array } +/** + * The function the model wants to call. + */ +export class ResultFunctionCall { + /** + * The arguments to call the function with in that the model generated in JSON format. + */ + arguments?: string + /** + * The name of the function to call. + */ + name?: string +} + +/** + * The tool call made by the model. + */ +export class ResultToolCall { + index: number + /** + * The identifier of the tool call. + */ + id?: string + /** + * The function the model wants to call. + */ + function?: ResultFunctionCall + /** + * The type of the tool. + */ + type?: string +} + +export class CompletionDelta { + /** + * The contents of the chunked message. + */ + content?: string + /** + * The refusal message. + */ + refusal?: string + /** + * The role of the author of the message. + */ + role?: string + /** + * The tool calls made by the model. + */ + tool_calls?: Array +} + +/** + * Represent a completion choice returned from a model. + */ +export class CompletionChoice { + /** + * The delta generated by the model. + */ + delta: CompletionDelta + /** + * The reason the model stopped generating tokens. + */ + finish_reason?: string + /** + * The index of the choice in the array of choices field. + */ + index: number +} + +/** + * The token usage statistics for the entire request. + */ +export class Usage { + /** + * The number of tokens in the generated completion. + */ + completion_tokens: number + /** + * The number of tokens in the prompt. + */ + prompt_tokens: number + /** + * The sum of completion_tokens and prompt_tokens. + */ + total_tokens: number +} + +/** + * Respresents the result format for a completion request using the Unified Inference API. + */ +export class UnifiedInferenceResult { + /** + * A unique identifier for the chat completion + */ + id: string + /** + * A list of completion choices. + */ + choices: Array + /** + * The model that generated the completion. + */ + model: string + /** + * The object type. + */ + object: string + /** + * The token usage statistics for the entire request. + */ + usage?: Usage +} + /** * Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint */ diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/unified_inference/UnifiedRequest.ts new file mode 100644 index 0000000000..67a48e73d0 --- /dev/null +++ b/specification/inference/unified_inference/UnifiedRequest.ts @@ -0,0 +1,214 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { UserDefinedValue } from '@spec_utils/UserDefinedValue' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' +import { Duration } from '@_types/Time' +import { TaskType } from '@inference/_types/TaskType' + +/** + * An object style representation of a single portion of a conversation. + */ +export interface ContentObject { + /** + * The text content. + */ + text: string + /** + * The type of content. + */ + type: string +} + +/** + * The function that the model called. + */ +export interface ToolCallFunction { + /** + * The arguments to call the function with in JSON format. + */ + arguments: string + /** + * The name of the function to call. + */ + name: string +} + +/** + * A tool call generated by the model. + */ +export interface ToolCall { + /** + * The identifier of the tool call. + */ + id: string + /** + * The function that the model called. + */ + function: ToolCallFunction + /** + * The type of the tool call. + */ + type: string +} + +/** + * An object representing part of the conversation. + */ +export interface Message { + /** + * The content of the message. + */ + content: string | Array + /** + * The role of the message author. + */ + role: string + /** + * The tool call that this message is responding to. + */ + tool_call_id?: string + /** + * The tool calls generated by the model. + */ + tool_calls?: Array +} + +/** + * The tool choice function. + * + */ +export interface CompletionToolChoiceFunction { + /** + * The name of the function to call. + */ + name: string +} + +/** + * Controls which tool is called by the model. + */ +export interface CompletionToolChoice { + /** + * The type of the tool. + */ + type: string + /** + * The tool choice function. + */ + function: CompletionToolChoiceFunction +} + +/** + * The completion tool function definition. + */ +export interface CompletionToolFunction { + /** + * A description of what the function does. + * This is used by the model to choose when and how to call the function. + */ + description?: string + /** + * The name of the function. + */ + name: string + /** + * The parameters the functional accepts. This should be formatted as a JSON object. + */ + parameters?: UserDefinedValue + /** + * Whether to enable schema adherence when generating the function call. + */ + strict?: boolean +} + +/** + * A list of tools that the model can call. + */ +export interface CompletionTool { + /** + * The type of tool. + */ + type: string + /** + * The function definition. + */ + function: CompletionToolFunction +} + +/** + * Perform inference on the service using the Unified Schema + * @rest_spec_name inference.unified_inference + * @availability stack since=8.18.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + */ +export interface Request extends RequestBase { + path_parts: { + /** + * The task type + */ + task_type?: TaskType + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * A list of objects representing the conversation. + */ + messages: Array + /** + * The ID of the model to use. + */ + model?: string + /** + * The upper bound limit for the number of tokens that can be generated for a completion request. + */ + max_completion_tokens?: number + /** + * A sequence of strings to control when the model should stop generating additional tokens. + */ + stop?: Array + /** + * The sampling temperature to use. + */ + temperature?: number + /** + * Controls which tool is called by the model. + */ + tool_choice?: string | CompletionToolChoice + /** + * A list of tools that the model can call. + */ + tools?: Array + /** + * Nucleus sampling, an alternative to sampling with temperature. + */ + top_p?: number + } +} diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/unified_inference/UnifiedResponse.ts new file mode 100644 index 0000000000..6bcd6089b1 --- /dev/null +++ b/specification/inference/unified_inference/UnifiedResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { UnifiedInferenceResult } from '@inference/_types/Results' + +export class Response { + body: UnifiedInferenceResult +} From 5b80fae7d8cf3fd7333ad6810c861770f9588bb0 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Mon, 16 Dec 2024 14:48:57 -0500 Subject: [PATCH 2/3] Fixing formatting --- specification/inference/unified_inference/UnifiedRequest.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/unified_inference/UnifiedRequest.ts index 67a48e73d0..6160e015e7 100644 --- a/specification/inference/unified_inference/UnifiedRequest.ts +++ b/specification/inference/unified_inference/UnifiedRequest.ts @@ -17,11 +17,11 @@ * under the License. */ +import { TaskType } from '@inference/_types/TaskType' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' import { Duration } from '@_types/Time' -import { TaskType } from '@inference/_types/TaskType' /** * An object style representation of a single portion of a conversation. From 90f9fd26b7b5dc9499db0133f54fb24e0245fa83 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Fri, 10 Jan 2025 14:51:08 -0500 Subject: [PATCH 3/3] Addressing feedback and removing response --- specification/_types/Binary.ts | 3 + specification/inference/_types/Results.ts | 114 --------------- .../unified_inference/UnifiedRequest.ts | 137 ++++++++++-------- .../unified_inference/UnifiedResponse.ts | 4 +- 4 files changed, 79 insertions(+), 179 deletions(-) diff --git a/specification/_types/Binary.ts b/specification/_types/Binary.ts index 56b792ea38..f00d8ddcd4 100644 --- a/specification/_types/Binary.ts +++ b/specification/_types/Binary.ts @@ -22,3 +22,6 @@ export type MapboxVectorTiles = ArrayBuffer // ES|QL columns export type EsqlColumns = ArrayBuffer + +// Streaming endpoints response +export type StreamResult = ArrayBuffer diff --git a/specification/inference/_types/Results.ts b/specification/inference/_types/Results.ts index 83bfe8a442..1a35289bab 100644 --- a/specification/inference/_types/Results.ts +++ b/specification/inference/_types/Results.ts @@ -88,120 +88,6 @@ export class InferenceResult { rerank?: Array } -/** - * The function the model wants to call. - */ -export class ResultFunctionCall { - /** - * The arguments to call the function with in that the model generated in JSON format. - */ - arguments?: string - /** - * The name of the function to call. - */ - name?: string -} - -/** - * The tool call made by the model. - */ -export class ResultToolCall { - index: number - /** - * The identifier of the tool call. - */ - id?: string - /** - * The function the model wants to call. - */ - function?: ResultFunctionCall - /** - * The type of the tool. - */ - type?: string -} - -export class CompletionDelta { - /** - * The contents of the chunked message. - */ - content?: string - /** - * The refusal message. - */ - refusal?: string - /** - * The role of the author of the message. - */ - role?: string - /** - * The tool calls made by the model. - */ - tool_calls?: Array -} - -/** - * Represent a completion choice returned from a model. - */ -export class CompletionChoice { - /** - * The delta generated by the model. - */ - delta: CompletionDelta - /** - * The reason the model stopped generating tokens. - */ - finish_reason?: string - /** - * The index of the choice in the array of choices field. - */ - index: number -} - -/** - * The token usage statistics for the entire request. - */ -export class Usage { - /** - * The number of tokens in the generated completion. - */ - completion_tokens: number - /** - * The number of tokens in the prompt. - */ - prompt_tokens: number - /** - * The sum of completion_tokens and prompt_tokens. - */ - total_tokens: number -} - -/** - * Respresents the result format for a completion request using the Unified Inference API. - */ -export class UnifiedInferenceResult { - /** - * A unique identifier for the chat completion - */ - id: string - /** - * A list of completion choices. - */ - choices: Array - /** - * The model that generated the completion. - */ - model: string - /** - * The object type. - */ - object: string - /** - * The token usage statistics for the entire request. - */ - usage?: Usage -} - /** * Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint */ diff --git a/specification/inference/unified_inference/UnifiedRequest.ts b/specification/inference/unified_inference/UnifiedRequest.ts index 6160e015e7..b646bfa255 100644 --- a/specification/inference/unified_inference/UnifiedRequest.ts +++ b/specification/inference/unified_inference/UnifiedRequest.ts @@ -21,8 +21,74 @@ import { TaskType } from '@inference/_types/TaskType' import { UserDefinedValue } from '@spec_utils/UserDefinedValue' import { RequestBase } from '@_types/Base' import { Id } from '@_types/common' +import { float, long } from '@_types/Numeric' import { Duration } from '@_types/Time' +/** + * Perform inference on the service using the Unified Schema + * @rest_spec_name inference.unified_inference + * @availability stack since=8.18.0 stability=stable visibility=public + * @availability serverless stability=stable visibility=public + */ +export interface Request extends RequestBase { + path_parts: { + /** + * The task type + */ + task_type?: TaskType + /** + * The inference Id + */ + inference_id: Id + } + query_parameters: { + /** + * Specifies the amount of time to wait for the inference request to complete. + * @server_default 30s + */ + timeout?: Duration + } + body: { + /** + * A list of objects representing the conversation. + */ + messages: Array + /** + * The ID of the model to use. + */ + model?: string + /** + * The upper bound limit for the number of tokens that can be generated for a completion request. + */ + max_completion_tokens?: long + /** + * A sequence of strings to control when the model should stop generating additional tokens. + */ + stop?: Array + /** + * The sampling temperature to use. + */ + temperature?: float + /** + * Controls which tool is called by the model. + */ + tool_choice?: CompletionToolType + /** + * A list of tools that the model can call. + */ + tools?: Array + /** + * Nucleus sampling, an alternative to sampling with temperature. + */ + top_p?: float + } +} + +/** + * @codegen_names string, object + */ +export type CompletionToolType = string | CompletionToolChoice + /** * An object style representation of a single portion of a conversation. */ @@ -58,7 +124,7 @@ export interface ToolCall { /** * The identifier of the tool call. */ - id: string + id: Id /** * The function that the model called. */ @@ -69,6 +135,11 @@ export interface ToolCall { type: string } +/** + * @codegen_names string, object + */ +export type MessageContent = string | Array + /** * An object representing part of the conversation. */ @@ -76,7 +147,7 @@ export interface Message { /** * The content of the message. */ - content: string | Array + content?: MessageContent /** * The role of the message author. */ @@ -84,7 +155,7 @@ export interface Message { /** * The tool call that this message is responding to. */ - tool_call_id?: string + tool_call_id?: Id /** * The tool calls generated by the model. */ @@ -152,63 +223,3 @@ export interface CompletionTool { */ function: CompletionToolFunction } - -/** - * Perform inference on the service using the Unified Schema - * @rest_spec_name inference.unified_inference - * @availability stack since=8.18.0 stability=stable visibility=public - * @availability serverless stability=stable visibility=public - */ -export interface Request extends RequestBase { - path_parts: { - /** - * The task type - */ - task_type?: TaskType - /** - * The inference Id - */ - inference_id: Id - } - query_parameters: { - /** - * Specifies the amount of time to wait for the inference request to complete. - * @server_default 30s - */ - timeout?: Duration - } - body: { - /** - * A list of objects representing the conversation. - */ - messages: Array - /** - * The ID of the model to use. - */ - model?: string - /** - * The upper bound limit for the number of tokens that can be generated for a completion request. - */ - max_completion_tokens?: number - /** - * A sequence of strings to control when the model should stop generating additional tokens. - */ - stop?: Array - /** - * The sampling temperature to use. - */ - temperature?: number - /** - * Controls which tool is called by the model. - */ - tool_choice?: string | CompletionToolChoice - /** - * A list of tools that the model can call. - */ - tools?: Array - /** - * Nucleus sampling, an alternative to sampling with temperature. - */ - top_p?: number - } -} diff --git a/specification/inference/unified_inference/UnifiedResponse.ts b/specification/inference/unified_inference/UnifiedResponse.ts index 6bcd6089b1..74b823bf40 100644 --- a/specification/inference/unified_inference/UnifiedResponse.ts +++ b/specification/inference/unified_inference/UnifiedResponse.ts @@ -17,8 +17,8 @@ * under the License. */ -import { UnifiedInferenceResult } from '@inference/_types/Results' +import { StreamResult } from '@_types/Binary' export class Response { - body: UnifiedInferenceResult + body: StreamResult }