From 5efc682a8fc2f8611dc50eb95f123304b96381df Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 3 Oct 2025 17:57:05 +0100 Subject: [PATCH] Apply source excludes early when retrieving the _inference_fields (#135897) The inference fields is automatically retrieved when `exclude_vectors` is set to false. In this change, we apply the source exclude early, in case the _inference_fields is removed, to avoid loading it entirely. We also protect against immutable map when adding the _inference_fields in _source since we cannot ensure that the map is always mutable. --- docs/changelog/135897.yaml | 5 +++ .../index/get/ShardGetService.java | 8 ++++ .../fetch/subphase/FetchSourcePhase.java | 5 +++ .../inference/30_semantic_text_inference.yml | 38 +++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 docs/changelog/135897.yaml diff --git a/docs/changelog/135897.yaml b/docs/changelog/135897.yaml new file mode 100644 index 0000000000000..62a4bbdbb64bc --- /dev/null +++ b/docs/changelog/135897.yaml @@ -0,0 +1,5 @@ +pr: 135897 +summary: Apply source excludes early when retrieving the `_inference_fields` +area: Search +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index fb9763831212d..94630d58a0ecb 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -426,6 +426,14 @@ private static Boolean shouldExcludeVectorsFromSourceExplicit(FetchSourceContext public static boolean shouldExcludeInferenceFieldsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) { var explicit = shouldExcludeInferenceFieldsFromSourceExplicit(fetchSourceContext); + var filter = fetchSourceContext != null ? fetchSourceContext.filter() : null; + if (filter != null) { + if (filter.isPathFiltered(InferenceMetadataFieldsMapper.NAME, true)) { + return true; + } else if (filter.isExplicitlyIncluded(InferenceMetadataFieldsMapper.NAME)) { + return false; + } + } return explicit != null ? explicit : INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings()); } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java index df99a718887e1..96def8dae1423 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java @@ -19,6 +19,7 @@ import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.lookup.SourceFilter; +import java.util.HashMap; import java.util.Map; public final class FetchSourcePhase implements FetchSubPhase { @@ -99,6 +100,10 @@ private Source replaceInferenceMetadataFields(SearchHit hit, Source source) { return source; } var newSource = source.source(); + if (newSource instanceof HashMap == false) { + // the map is not mutable + newSource = new HashMap<>(newSource); + } newSource.put(InferenceMetadataFieldsMapper.NAME, field.getValues().get(0)); return Source.fromMap(newSource, source.sourceContentType()); } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml index bb4c00acf39b7..f8e1c2a1811c3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml @@ -1385,4 +1385,42 @@ setup: - match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.start_offset: 0 } - match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.end_offset: 22 } + - do: + search: + index: test-index + body: + _source: + exclude_vectors: false + excludes: ["*"] + query: + term: + _id: doc_1 + + - match: { hits.total.value: 1 } + - length: { hits.hits.0._source: 0} + + - do: + search: + index: test-index + body: + _source: + exclude_vectors: false + excludes: ["*_field"] + query: + term: + _id: doc_1 + + - match: { hits.total.value: 1 } + - length: { hits.hits.0._source: 1} + - length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks: 1 } + - length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field: 1 } + - exists: hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.embeddings + - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.start_offset: 0 } + - match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.end_offset: 14 } + - length: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field: 1 } + - exists: hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.embeddings + - match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.start_offset: 0 } + - match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.end_offset: 22 } + +