diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json
new file mode 100644
index 0000000000000..98854625d0471
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json
@@ -0,0 +1,37 @@
+{
+ "inference.chat_completion_unified": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html",
+ "description": "Perform chat completion inference"
+ },
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "text/event-stream"
+ ],
+ "content_type": [
+ "application/json"
+ ]
+ },
+ "url": {
+ "paths": [
+ {
+ "path": "/_inference/chat_completion/{inference_id}/_stream",
+ "methods": [
+ "POST"
+ ],
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
+ }
+ }
+ }
+ ]
+ },
+ "body": {
+ "description": "The inference payload"
+ }
+ }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json
new file mode 100644
index 0000000000000..6c753e59e3434
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json
@@ -0,0 +1,37 @@
+{
+ "inference.completion": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+ "description": "Perform completion inference"
+ },
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "application/json"
+ ],
+ "content_type": [
+ "application/json"
+ ]
+ },
+ "url": {
+ "paths": [
+ {
+ "path": "/_inference/completion/{inference_id}",
+ "methods": [
+ "POST"
+ ],
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
+ }
+ }
+ }
+ ]
+ },
+ "body": {
+ "description": "The inference payload"
+ }
+ }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json
index 14e7519c3796e..8887d9d0a1ebe 100644
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json
@@ -1,47 +1,49 @@
{
- "inference.get":{
- "documentation":{
- "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html",
- "description":"Get an inference endpoint"
+ "inference.get": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html",
+ "description": "Get an inference endpoint"
},
- "stability":"stable",
- "visibility":"public",
- "headers":{
- "accept": [ "application/json"]
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "application/json"
+ ]
},
- "url":{
- "paths":[
+ "url": {
+ "paths": [
{
- "path":"/_inference",
- "methods":[
+ "path": "/_inference",
+ "methods": [
"GET"
]
},
{
- "path":"/_inference/{inference_id}",
- "methods":[
+ "path": "/_inference/{inference_id}",
+ "methods": [
"GET"
],
- "parts":{
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
}
}
},
{
- "path":"/_inference/{task_type}/{inference_id}",
- "methods":[
+ "path": "/_inference/{task_type}/{inference_id}",
+ "methods": [
"GET"
],
- "parts":{
- "task_type":{
- "type":"string",
- "description":"The task type"
+ "parts": {
+ "task_type": {
+ "type": "string",
+ "description": "The task type"
},
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
}
}
}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json
deleted file mode 100644
index eb4c1268c28ca..0000000000000
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
- "inference.inference":{
- "documentation":{
- "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
- "description":"Perform inference"
- },
- "stability":"stable",
- "visibility":"public",
- "headers":{
- "accept": [ "application/json"],
- "content_type": ["application/json"]
- },
- "url":{
- "paths":[
- {
- "path":"/_inference/{inference_id}",
- "methods":[
- "POST"
- ],
- "parts":{
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
- }
- }
- },
- {
- "path":"/_inference/{task_type}/{inference_id}",
- "methods":[
- "POST"
- ],
- "parts":{
- "task_type":{
- "type":"string",
- "description":"The task type"
- },
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
- }
- }
- }
- ]
- },
- "body":{
- "description":"The inference payload"
- }
- }
-}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json
index 411392fe39908..4879007724450 100644
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json
@@ -1,49 +1,53 @@
{
- "inference.put":{
- "documentation":{
- "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html",
- "description":"Configure an inference endpoint for use in the Inference API"
+ "inference.put": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html",
+ "description": "Configure an inference endpoint for use in the Inference API"
},
- "stability":"stable",
- "visibility":"public",
- "headers":{
- "accept": [ "application/json"],
- "content_type": ["application/json"]
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "application/json"
+ ],
+ "content_type": [
+ "application/json"
+ ]
},
- "url":{
- "paths":[
+ "url": {
+ "paths": [
{
- "path":"/_inference/{inference_id}",
- "methods":[
+ "path": "/_inference/{inference_id}",
+ "methods": [
"PUT"
],
- "parts":{
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
}
}
},
{
- "path":"/_inference/{task_type}/{inference_id}",
- "methods":[
+ "path": "/_inference/{task_type}/{inference_id}",
+ "methods": [
"PUT"
],
- "parts":{
- "task_type":{
- "type":"string",
- "description":"The task type"
+ "parts": {
+ "task_type": {
+ "type": "string",
+ "description": "The task type"
},
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
}
}
}
]
},
- "body":{
- "description":"The inference endpoint's task and service settings"
+ "body": {
+ "description": "The inference endpoint's task and service settings"
}
}
}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json
new file mode 100644
index 0000000000000..c08a51a8b9b98
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json
@@ -0,0 +1,37 @@
+{
+ "inference.rerank": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+ "description": "Perform reranking inference"
+ },
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "application/json"
+ ],
+ "content_type": [
+ "application/json"
+ ]
+ },
+ "url": {
+ "paths": [
+ {
+ "path": "/_inference/rerank/{inference_id}",
+ "methods": [
+ "POST"
+ ],
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
+ }
+ }
+ }
+ ]
+ },
+ "body": {
+ "description": "The inference payload"
+ }
+ }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json
new file mode 100644
index 0000000000000..90ebb6e6dc4c2
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json
@@ -0,0 +1,37 @@
+{
+ "inference.sparse_embedding": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+ "description": "Perform sparse embedding inference"
+ },
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "application/json"
+ ],
+ "content_type": [
+ "application/json"
+ ]
+ },
+ "url": {
+ "paths": [
+ {
+ "path": "/_inference/sparse_embedding/{inference_id}",
+ "methods": [
+ "POST"
+ ],
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
+ }
+ }
+ }
+ ]
+ },
+ "body": {
+ "description": "The inference payload"
+ }
+ }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json
new file mode 100644
index 0000000000000..a1d770c46305b
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json
@@ -0,0 +1,37 @@
+{
+ "inference.stream_completion": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
+ "description": "Perform streaming inference"
+ },
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "text/event-stream"
+ ],
+ "content_type": [
+ "application/json"
+ ]
+ },
+ "url": {
+ "paths": [
+ {
+ "path": "/_inference/completion/{inference_id}/_stream",
+ "methods": [
+ "POST"
+ ],
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
+ }
+ }
+ }
+ ]
+ },
+ "body": {
+ "description": "The inference payload"
+ }
+ }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json
deleted file mode 100644
index 493306e10d5c7..0000000000000
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
- "inference.stream_inference":{
- "documentation":{
- "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
- "description":"Perform streaming inference"
- },
- "stability":"stable",
- "visibility":"public",
- "headers":{
- "accept": [ "text/event-stream"],
- "content_type": ["application/json"]
- },
- "url":{
- "paths":[
- {
- "path":"/_inference/{inference_id}/_stream",
- "methods":[
- "POST"
- ],
- "parts":{
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
- }
- }
- },
- {
- "path":"/_inference/{task_type}/{inference_id}/_stream",
- "methods":[
- "POST"
- ],
- "parts":{
- "task_type":{
- "type":"string",
- "description":"The task type"
- },
- "inference_id":{
- "type":"string",
- "description":"The inference Id"
- }
- }
- }
- ]
- },
- "body":{
- "description":"The inference payload"
- }
- }
-}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json
new file mode 100644
index 0000000000000..309a1d80b7416
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json
@@ -0,0 +1,37 @@
+{
+ "inference.text_embedding": {
+ "documentation": {
+ "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+ "description": "Perform text embedding inference"
+ },
+ "stability": "stable",
+ "visibility": "public",
+ "headers": {
+ "accept": [
+ "application/json"
+ ],
+ "content_type": [
+ "application/json"
+ ]
+ },
+ "url": {
+ "paths": [
+ {
+ "path": "/_inference/text_embedding/{inference_id}",
+ "methods": [
+ "POST"
+ ],
+ "parts": {
+ "inference_id": {
+ "type": "string",
+ "description": "The inference Id"
+ }
+ }
+ }
+ ]
+ },
+ "body": {
+ "description": "The inference payload"
+ }
+ }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json
deleted file mode 100644
index 84182d19f8825..0000000000000
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
- "inference.unified_inference": {
- "documentation": {
- "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html",
- "description": "Perform inference using the Unified Schema"
- },
- "stability": "stable",
- "visibility": "public",
- "headers": {
- "accept": ["text/event-stream"],
- "content_type": ["application/json"]
- },
- "url": {
- "paths": [
- {
- "path": "/_inference/{inference_id}/_unified",
- "methods": ["POST"],
- "parts": {
- "inference_id": {
- "type": "string",
- "description": "The inference Id"
- }
- }
- },
- {
- "path": "/_inference/{task_type}/{inference_id}/_unified",
- "methods": ["POST"],
- "parts": {
- "task_type": {
- "type": "string",
- "description": "The task type"
- },
- "inference_id": {
- "type": "string",
- "description": "The inference Id"
- }
- }
- }
- ]
- },
- "body": {
- "description": "The inference payload"
- }
- }
-}
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java
index 928da95d9c2f0..58c952b9c556a 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java
@@ -7,6 +7,8 @@
package org.elasticsearch.xpack.inference.external.http.sender;
+import org.elasticsearch.inference.TaskType;
+
import java.util.List;
import java.util.Objects;
@@ -15,7 +17,7 @@
* The main difference between this class and {@link UnifiedChatInput} is this should only be used for
* {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the
* {@link org.elasticsearch.inference.InferenceService#infer} code path. These are requests sent to the
- * API without using the _unified
route.
+ * API without using the {@link TaskType#CHAT_COMPLETION} task type.
*/
public class ChatCompletionInput extends InferenceInputs {
private final List input;
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java
index fceec7c431182..f4f0511a4cc1b 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java
@@ -10,6 +10,7 @@
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.inference.Model;
+import org.elasticsearch.inference.TaskType;
import org.elasticsearch.inference.UnifiedCompletionRequest;
import java.util.List;
@@ -20,7 +21,7 @@
* The main difference between this class and {@link ChatCompletionInput} is this should only be used for
* {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the
* {@link org.elasticsearch.inference.InferenceService#unifiedCompletionInfer(Model, UnifiedCompletionRequest, TimeValue, ActionListener)}
- * code path. These are requests sent to the API with the _unified
route.
+ * code path. These are requests sent to the API with the _stream
route and {@link TaskType#CHAT_COMPLETION}.
*/
public class UnifiedChatInput extends InferenceInputs {
private final UnifiedCompletionRequest request;
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml
index cdc69001d33ef..62a49422079b8 100644
--- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml
+++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml
@@ -25,18 +25,3 @@
}
}
- match: { error.reason: "Unknown task_type [bad]" }
-
----
-"Test inference with bad task type":
- - do:
- catch: bad_request
- inference.inference:
- task_type: bad
- inference_id: elser_model
- body: >
- {
- "input": "important text"
- }
- - match: { error.reason: "Unknown task_type [bad]" }
-
-