From d7c3e080323e0cb0b3f7902b7df06fc2077e2c79 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Wed, 5 Mar 2025 16:43:31 -0500 Subject: [PATCH 1/2] Pulling api spec changes --- .../inference.chat_completion_unified.json | 37 ++++++++++++ .../api/inference.completion.json | 37 ++++++++++++ .../rest-api-spec/api/inference.get.json | 56 ++++++++--------- .../api/inference.inference.json | 49 --------------- .../rest-api-spec/api/inference.put.json | 60 ++++++++++--------- .../rest-api-spec/api/inference.rerank.json | 37 ++++++++++++ .../api/inference.sparse_embedding.json | 37 ++++++++++++ .../api/inference.stream_completion.json | 37 ++++++++++++ .../api/inference.stream_inference.json | 49 --------------- .../api/inference.text_embedding.json | 37 ++++++++++++ .../api/inference.unified_inference.json | 45 -------------- 11 files changed, 283 insertions(+), 198 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json delete mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json delete mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json delete mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json new file mode 100644 index 0000000000000..98854625d0471 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json @@ -0,0 +1,37 @@ +{ + "inference.chat_completion_unified": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html", + "description": "Perform chat completion inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "text/event-stream" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/chat_completion/{inference_id}/_stream", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json new file mode 100644 index 0000000000000..6c753e59e3434 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json @@ -0,0 +1,37 @@ +{ + "inference.completion": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform completion inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/completion/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json index 14e7519c3796e..8887d9d0a1ebe 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json @@ -1,47 +1,49 @@ { - "inference.get":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html", - "description":"Get an inference endpoint" + "inference.get": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html", + "description": "Get an inference endpoint" }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "application/json"] + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ] }, - "url":{ - "paths":[ + "url": { + "paths": [ { - "path":"/_inference", - "methods":[ + "path": "/_inference", + "methods": [ "GET" ] }, { - "path":"/_inference/{inference_id}", - "methods":[ + "path": "/_inference/{inference_id}", + "methods": [ "GET" ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" } } }, { - "path":"/_inference/{task_type}/{inference_id}", - "methods":[ + "path": "/_inference/{task_type}/{inference_id}", + "methods": [ "GET" ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" + "parts": { + "task_type": { + "type": "string", + "description": "The task type" }, - "inference_id":{ - "type":"string", - "description":"The inference Id" + "inference_id": { + "type": "string", + "description": "The inference Id" } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json deleted file mode 100644 index eb4c1268c28ca..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "inference.inference":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", - "description":"Perform inference" - }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "application/json"], - "content_type": ["application/json"] - }, - "url":{ - "paths":[ - { - "path":"/_inference/{inference_id}", - "methods":[ - "POST" - ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - }, - { - "path":"/_inference/{task_type}/{inference_id}", - "methods":[ - "POST" - ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" - }, - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - } - ] - }, - "body":{ - "description":"The inference payload" - } - } -} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json index 411392fe39908..4879007724450 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json @@ -1,49 +1,53 @@ { - "inference.put":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html", - "description":"Configure an inference endpoint for use in the Inference API" + "inference.put": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html", + "description": "Configure an inference endpoint for use in the Inference API" }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "application/json"], - "content_type": ["application/json"] + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] }, - "url":{ - "paths":[ + "url": { + "paths": [ { - "path":"/_inference/{inference_id}", - "methods":[ + "path": "/_inference/{inference_id}", + "methods": [ "PUT" ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" } } }, { - "path":"/_inference/{task_type}/{inference_id}", - "methods":[ + "path": "/_inference/{task_type}/{inference_id}", + "methods": [ "PUT" ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" + "parts": { + "task_type": { + "type": "string", + "description": "The task type" }, - "inference_id":{ - "type":"string", - "description":"The inference Id" + "inference_id": { + "type": "string", + "description": "The inference Id" } } } ] }, - "body":{ - "description":"The inference endpoint's task and service settings" + "body": { + "description": "The inference endpoint's task and service settings" } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json new file mode 100644 index 0000000000000..c08a51a8b9b98 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json @@ -0,0 +1,37 @@ +{ + "inference.rerank": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform reranking inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/rerank/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json new file mode 100644 index 0000000000000..90ebb6e6dc4c2 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json @@ -0,0 +1,37 @@ +{ + "inference.sparse_embedding": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform sparse embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/sparse_embedding/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json new file mode 100644 index 0000000000000..a1d770c46305b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json @@ -0,0 +1,37 @@ +{ + "inference.stream_completion": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", + "description": "Perform streaming inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "text/event-stream" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/completion/{inference_id}/_stream", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json deleted file mode 100644 index 493306e10d5c7..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "inference.stream_inference":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", - "description":"Perform streaming inference" - }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "text/event-stream"], - "content_type": ["application/json"] - }, - "url":{ - "paths":[ - { - "path":"/_inference/{inference_id}/_stream", - "methods":[ - "POST" - ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - }, - { - "path":"/_inference/{task_type}/{inference_id}/_stream", - "methods":[ - "POST" - ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" - }, - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - } - ] - }, - "body":{ - "description":"The inference payload" - } - } -} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json new file mode 100644 index 0000000000000..309a1d80b7416 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json @@ -0,0 +1,37 @@ +{ + "inference.text_embedding": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform text embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/text_embedding/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json deleted file mode 100644 index 84182d19f8825..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "inference.unified_inference": { - "documentation": { - "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "description": "Perform inference using the Unified Schema" - }, - "stability": "stable", - "visibility": "public", - "headers": { - "accept": ["text/event-stream"], - "content_type": ["application/json"] - }, - "url": { - "paths": [ - { - "path": "/_inference/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - }, - { - "path": "/_inference/{task_type}/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - } - ] - }, - "body": { - "description": "The inference payload" - } - } -} From aed3f176152a8075c717a0f2c4ad291335b3a554 Mon Sep 17 00:00:00 2001 From: Jonathan Buttner Date: Wed, 5 Mar 2025 16:52:17 -0500 Subject: [PATCH 2/2] Fixing test and updating code javadoc --- .../external/http/sender/ChatCompletionInput.java | 4 +++- .../external/http/sender/UnifiedChatInput.java | 3 ++- .../test/inference/inference_crud.yml | 15 --------------- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java index 928da95d9c2f0..58c952b9c556a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.inference.external.http.sender; +import org.elasticsearch.inference.TaskType; + import java.util.List; import java.util.Objects; @@ -15,7 +17,7 @@ * The main difference between this class and {@link UnifiedChatInput} is this should only be used for * {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the * {@link org.elasticsearch.inference.InferenceService#infer} code path. These are requests sent to the - * API without using the _unified route. + * API without using the {@link TaskType#CHAT_COMPLETION} task type. */ public class ChatCompletionInput extends InferenceInputs { private final List input; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java index fceec7c431182..f4f0511a4cc1b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.inference.UnifiedCompletionRequest; import java.util.List; @@ -20,7 +21,7 @@ * The main difference between this class and {@link ChatCompletionInput} is this should only be used for * {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the * {@link org.elasticsearch.inference.InferenceService#unifiedCompletionInfer(Model, UnifiedCompletionRequest, TimeValue, ActionListener)} - * code path. These are requests sent to the API with the _unified route. + * code path. These are requests sent to the API with the _stream route and {@link TaskType#CHAT_COMPLETION}. */ public class UnifiedChatInput extends InferenceInputs { private final UnifiedCompletionRequest request; diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml index cdc69001d33ef..62a49422079b8 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml @@ -25,18 +25,3 @@ } } - match: { error.reason: "Unknown task_type [bad]" } - ---- -"Test inference with bad task type": - - do: - catch: bad_request - inference.inference: - task_type: bad - inference_id: elser_model - body: > - { - "input": "important text" - } - - match: { error.reason: "Unknown task_type [bad]" } - -