From 8295855249262098d8f9e01b7b03b99993f74a5e Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 21 Nov 2025 11:56:38 +0100 Subject: [PATCH 1/2] Introducing a new built-in endpoint (.gp-llm-v2-completion) --- ...ceGetModelsWithElasticInferenceServiceIT.java | 9 ++++++++- .../elastic/InternalPreconfiguredEndpoints.java | 11 +++++++++++ .../InternalPreconfiguredEndpointsTests.java | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java index b413a38a052e8..7b02f81311880 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceGetModelsWithElasticInferenceServiceIT.java @@ -41,16 +41,23 @@ public static void init() { public void testGetDefaultEndpoints() throws IOException { var allModels = getAllModels(); var chatCompletionModels = getModels("_all", TaskType.CHAT_COMPLETION); + var completionModels = getModels("_all", TaskType.COMPLETION); - assertThat(allModels, hasSize(8)); + assertThat(allModels, hasSize(9)); assertThat(chatCompletionModels, hasSize(2)); + assertThat(completionModels, hasSize(1)); for (var model : chatCompletionModels) { assertEquals("chat_completion", model.get("task_type")); } + for (var model : completionModels) { + assertEquals("completion", model.get("task_type")); + } + assertInferenceIdTaskType(allModels, ".rainbow-sprinkles-elastic", TaskType.CHAT_COMPLETION); assertInferenceIdTaskType(allModels, ".gp-llm-v2-chat_completion", TaskType.CHAT_COMPLETION); + assertInferenceIdTaskType(allModels, ".gp-llm-v2-completion", TaskType.COMPLETION); assertInferenceIdTaskType(allModels, ".elser-2-elastic", TaskType.SPARSE_EMBEDDING); assertInferenceIdTaskType(allModels, ".jina-embeddings-v3", TaskType.TEXT_EMBEDDING); assertInferenceIdTaskType(allModels, ".elastic-rerank-v1", TaskType.RERANK); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index 8b1ee97a2840d..82cede55ef6bb 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -36,6 +36,7 @@ public class InternalPreconfiguredEndpoints { // gp-llm-v2 public static final String GP_LLM_V2_MODEL_ID = "gp-llm-v2"; public static final String GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-chat_completion"; + public static final String GP_LLM_V2_COMPLETION_ENDPOINT_ID = ".gp-llm-v2-completion"; // elser-2 public static final String DEFAULT_ELSER_2_MODEL_ID = "elser_model_2"; @@ -97,6 +98,16 @@ public record MinimalModel( ChunkingSettingsBuilder.DEFAULT_SETTINGS ), GP_LLM_V2_COMPLETION_SERVICE_SETTINGS + ), + new MinimalModel( + new ModelConfigurations( + GP_LLM_V2_COMPLETION_ENDPOINT_ID, + TaskType.COMPLETION, + ElasticInferenceService.NAME, + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS, + ChunkingSettingsBuilder.DEFAULT_SETTINGS + ), + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS ) ), DEFAULT_ELSER_2_MODEL_ID, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java index cfd166c7d240e..ce4c674e935f0 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpointsTests.java @@ -7,9 +7,11 @@ package org.elasticsearch.xpack.inference.services.elastic; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.ESTestCase; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; public class InternalPreconfiguredEndpointsTests extends ESTestCase { public void testGetWithModelName_ReturnsAnEmptyList_IfNameDoesNotExist() { @@ -20,4 +22,18 @@ public void testGetWithModelName_ReturnsChatCompletionModels() { var models = InternalPreconfiguredEndpoints.getWithModelName(InternalPreconfiguredEndpoints.DEFAULT_CHAT_COMPLETION_MODEL_ID_V1); assertThat(models, hasSize(1)); } + + public void testGetWithModelName_ReturnsGpLlmV2Models() { + var models = InternalPreconfiguredEndpoints.getWithModelName(InternalPreconfiguredEndpoints.GP_LLM_V2_MODEL_ID); + assertThat(models, hasSize(2)); + var taskTypes = models.stream().map(m -> m.configurations().getTaskType()).toList(); + assertTrue("Should contain CHAT_COMPLETION", taskTypes.contains(TaskType.CHAT_COMPLETION)); + assertTrue("Should contain COMPLETION", taskTypes.contains(TaskType.COMPLETION)); + } + + public void testGetWithInferenceId_ReturnsGpLlmV2CompletionEndpoint() { + var model = InternalPreconfiguredEndpoints.getWithInferenceId(InternalPreconfiguredEndpoints.GP_LLM_V2_COMPLETION_ENDPOINT_ID); + assertThat(model.configurations().getInferenceEntityId(), is(InternalPreconfiguredEndpoints.GP_LLM_V2_COMPLETION_ENDPOINT_ID)); + assertThat(model.configurations().getTaskType(), is(TaskType.COMPLETION)); + } } From fc28417c6798015eae888371b9fff3378282f66b Mon Sep 17 00:00:00 2001 From: afoucret Date: Mon, 24 Nov 2025 10:16:55 +0100 Subject: [PATCH 2/2] Remove useless chunking settings for pre-defined completion and chat_completion endpoints. --- .../services/elastic/InternalPreconfiguredEndpoints.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java index 82cede55ef6bb..53c1be21e7887 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/InternalPreconfiguredEndpoints.java @@ -81,8 +81,7 @@ public record MinimalModel( DEFAULT_CHAT_COMPLETION_ENDPOINT_ID_V1, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, - COMPLETION_SERVICE_SETTINGS, - ChunkingSettingsBuilder.DEFAULT_SETTINGS + COMPLETION_SERVICE_SETTINGS ), COMPLETION_SERVICE_SETTINGS ) @@ -94,8 +93,7 @@ public record MinimalModel( GP_LLM_V2_CHAT_COMPLETION_ENDPOINT_ID, TaskType.CHAT_COMPLETION, ElasticInferenceService.NAME, - GP_LLM_V2_COMPLETION_SERVICE_SETTINGS, - ChunkingSettingsBuilder.DEFAULT_SETTINGS + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS ), GP_LLM_V2_COMPLETION_SERVICE_SETTINGS ), @@ -104,8 +102,7 @@ public record MinimalModel( GP_LLM_V2_COMPLETION_ENDPOINT_ID, TaskType.COMPLETION, ElasticInferenceService.NAME, - GP_LLM_V2_COMPLETION_SERVICE_SETTINGS, - ChunkingSettingsBuilder.DEFAULT_SETTINGS + GP_LLM_V2_COMPLETION_SERVICE_SETTINGS ), GP_LLM_V2_COMPLETION_SERVICE_SETTINGS )