From 8295855249262098d8f9e01b7b03b99993f74a5e Mon Sep 17 00:00:00 2001
From: afoucret <aurelien.foucret@elastic.co>
Date: Fri, 21 Nov 2025 11:56:38 +0100
Subject: [PATCH 1/2] Introducing a new built-in endpoint
 (.gp-llm-v2-completion)

---
 ...ceGetModelsWithElasticInferenceServiceIT.java |  9 ++++++++-
 .../elastic/InternalPreconfiguredEndpoints.java  | 11 +++++++++++
 .../InternalPreconfiguredEndpointsTests.java     | 16 ++++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java
index b413a38a052e8..7b02f81311880 100644
--- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java
+++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java
@@ -41,16 +41,23 @@ public static void init() {
     public void testGetDefaultEndpoints() throws IOException {
         var allModels = getAllModels();
         var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION);
+        var completionModels = getModels("_all", TaskType.COMPLETION);
 
-        assertThat(allModels, hasSize(8));
+        assertThat(allModels, hasSize(9));
         assertThat(chatCompletionModels, hasSize(2));
+        assertThat(completionModels, hasSize(1));
 
         for (var model : chatCompletionModels) {
             assertEquals("chat_completion", model.get("task_type"));
         }
 
+        for (var model : completionModels) {
+            assertEquals("completion", model.get("task_type"));
+        }
+
         assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION);
         assertInferenceIdTaskType(allModels, ".gp-llm-v2-chat_completion", TaskType.CHAT_COMPLETION);
+        assertInferenceIdTaskType(allModels, ".gp-llm-v2-completion", TaskType.COMPLETION);
         assertInferenceIdTaskType(allModels, ".elser-2-elastic", TaskType.SPARSE_EMBEDDING);
         assertInferenceIdTaskType(allModels, ".jina-embeddings-v3", TaskType.TEXT_EMBEDDING);
         assertInferenceIdTaskType(allModels, ".elastic-rerank-v1", TaskType.RERANK);
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java
index 8b1ee97a2840d..82cede55ef6bb 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java
@@ -36,6 +36,7 @@ public class InternalPreconfiguredEndpoints {
     // gp-llm-v2
     public static final String GP_LLM_V2_MODEL_ID = "gp-llm-v2";
     public static final String GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-chat_completion";
+    public static final String GP_LLM_V2_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-completion";
 
     // elser-2
     public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2";
@@ -97,6 +98,16 @@ public record MinimalModel(
                     ChunkingSettingsBuilder.DEFAULT_SETTINGS
                 ),
                 GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
+            ),
+            new MinimalModel(
+                new ModelConfigurations(
+                    GP_LLM_V2_COMPLETION_ENDPOINT_ID,
+                    TaskType.COMPLETION,
+                    ElasticInferenceService.NAME,
+                    GP_LLM_V2_COMPLETION_SERVICE_SETTINGS,
+                    ChunkingSettingsBuilder.DEFAULT_SETTINGS
+                ),
+                GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
             )
         ),
         DEFAULT_ELSER_2_MODEL_ID,
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java
index cfd166c7d240e..ce4c674e935f0 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java
@@ -7,9 +7,11 @@
 
 package org.elasticsearch.xpack.inference.services.elastic;
 
+import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.test.ESTestCase;
 
 import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.is;
 
 public class InternalPreconfiguredEndpointsTests extends ESTestCase {
     public void testGetWithModelName_ReturnsAnEmptyList_IfNameDoesNotExist() {
@@ -20,4 +22,18 @@ public void testGetWithModelName_ReturnsChatCompletionModels() {
         var models = InternalPreconfiguredEndpoints.getWithModelName(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1);
         assertThat(models, hasSize(1));
     }
+
+    public void testGetWithModelName_ReturnsGpLlmV2Models() {
+        var models = InternalPreconfiguredEndpoints.getWithModelName(InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID);
+        assertThat(models, hasSize(2));
+        var taskTypes = models.stream().map(m -> m.configurations().getTaskType()).toList();
+        assertTrue("Should contain CHAT_COMPLETION", taskTypes.contains(TaskType.CHAT_COMPLETION));
+        assertTrue("Should contain COMPLETION", taskTypes.contains(TaskType.COMPLETION));
+    }
+
+    public void testGetWithInferenceId_ReturnsGpLlmV2CompletionEndpoint() {
+        var model = InternalPreconfiguredEndpoints.getWithInferenceId(InternalPreconfiguredEndpoints.GP_LLM_V2_COMPLETION_ENDPOINT_ID);
+        assertThat(model.configurations().getInferenceEntityId(), is(InternalPreconfiguredEndpoints.GP_LLM_V2_COMPLETION_ENDPOINT_ID));
+        assertThat(model.configurations().getTaskType(), is(TaskType.COMPLETION));
+    }
 }

From fc28417c6798015eae888371b9fff3378282f66b Mon Sep 17 00:00:00 2001
From: afoucret <aurelien.foucret@elastic.co>
Date: Mon, 24 Nov 2025 10:16:55 +0100
Subject: [PATCH 2/2] Remove useless chunking settings for pre-defined
 completion and chat_completion endpoints.

---
 .../services/elastic/InternalPreconfiguredEndpoints.java | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java
index 82cede55ef6bb..53c1be21e7887 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java
@@ -81,8 +81,7 @@ public record MinimalModel(
                     DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1,
                     TaskType.CHAT_COMPLETION,
                     ElasticInferenceService.NAME,
-                    COMPLETION_SERVICE_SETTINGS,
-                    ChunkingSettingsBuilder.DEFAULT_SETTINGS
+                    COMPLETION_SERVICE_SETTINGS
                 ),
                 COMPLETION_SERVICE_SETTINGS
             )
@@ -94,8 +93,7 @@ public record MinimalModel(
                     GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID,
                     TaskType.CHAT_COMPLETION,
                     ElasticInferenceService.NAME,
-                    GP_LLM_V2_COMPLETION_SERVICE_SETTINGS,
-                    ChunkingSettingsBuilder.DEFAULT_SETTINGS
+                    GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
                 ),
                 GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
             ),
@@ -104,8 +102,7 @@ public record MinimalModel(
                     GP_LLM_V2_COMPLETION_ENDPOINT_ID,
                     TaskType.COMPLETION,
                     ElasticInferenceService.NAME,
-                    GP_LLM_V2_COMPLETION_SERVICE_SETTINGS,
-                    ChunkingSettingsBuilder.DEFAULT_SETTINGS
+                    GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
                 ),
                 GP_LLM_V2_COMPLETION_SERVICE_SETTINGS
             )