From 33bbbf1c957fc4c50c8975eef3a7a662b34f3baa Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 12 Jun 2025 19:54:50 +0100 Subject: [PATCH 1/4] Fixed backporting --- docs/changelog/129359.yaml | 5 + .../xpack/rank/RankRRFFeatures.java | 8 +- .../rank/linear/LinearRetrieverBuilder.java | 37 +++++- .../test/linear/10_linear_retriever.yml | 109 +++++++++++++++++- 4 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 docs/changelog/129359.yaml diff --git a/docs/changelog/129359.yaml b/docs/changelog/129359.yaml new file mode 100644 index 0000000000000..9b1f6234d6579 --- /dev/null +++ b/docs/changelog/129359.yaml @@ -0,0 +1,5 @@ +pr: 129359 +summary: Add min score linear retriever +area: Search +type: enhancement +issues: [] diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 7952a7f8d7da1..752fc3bf77170 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -15,6 +15,7 @@ import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT; import static org.elasticsearch.xpack.rank.linear.L2ScoreNormalizer.LINEAR_RETRIEVER_L2_NORM; +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder.LINEAR_RETRIEVER_MINSCORE_FIX; import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX; import static org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilder.RRF_RETRIEVER_COMPOSITION_SUPPORTED; @@ -32,6 +33,11 @@ public Set getFeatures() { @Override public Set getTestFeatures() { - return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, LINEAR_RETRIEVER_L2_NORM); + return Set.of( + INNER_RETRIEVERS_FILTER_SUPPORT, + LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, + LINEAR_RETRIEVER_L2_NORM, + LINEAR_RETRIEVER_MINSCORE_FIX + ); } } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 66bbbf95bc9d6..d03a8c619c3f9 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -10,6 +10,7 @@ import org.apache.lucene.search.ScoreDoc; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.util.Maps; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.search.builder.SearchSourceBuilder; @@ -46,6 +47,7 @@ */ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder { + public static final NodeFeature LINEAR_RETRIEVER_MINSCORE_FIX = new NodeFeature("linear_retriever_minscore_fix"); public static final String NAME = "linear"; public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers"); @@ -125,12 +127,35 @@ public LinearRetrieverBuilder( this.normalizers = normalizers; } + public LinearRetrieverBuilder( + List innerRetrievers, + int rankWindowSize, + float[] weights, + ScoreNormalizer[] normalizers, + Float minScore, + String retrieverName, + List preFilterQueryBuilders + ) { + this(innerRetrievers, rankWindowSize, weights, normalizers); + this.minScore = minScore; + if (minScore != null && minScore < 0) { + throw new IllegalArgumentException("[min_score] must be greater than or equal to 0, was: [" + minScore + "]"); + } + this.retrieverName = retrieverName; + this.preFilterQueryBuilders = preFilterQueryBuilders; + } + @Override protected LinearRetrieverBuilder clone(List newChildRetrievers, List newPreFilterQueryBuilders) { - LinearRetrieverBuilder clone = new LinearRetrieverBuilder(newChildRetrievers, rankWindowSize, weights, normalizers); - clone.preFilterQueryBuilders = newPreFilterQueryBuilders; - clone.retrieverName = retrieverName; - return clone; + return new LinearRetrieverBuilder( + newChildRetrievers, + rankWindowSize, + weights, + normalizers, + minScore, + retrieverName, + newPreFilterQueryBuilders + ); } @Override @@ -181,6 +206,10 @@ protected RankDoc[] combineInnerRetrieverResults(List rankResults, b topResults[rank] = sortedResults[rank]; topResults[rank].rank = rank + 1; } + // Filter by minScore if set(inclusive) + if (minScore != null) { + topResults = Arrays.stream(topResults).filter(doc -> doc.score >= minScore).toArray(LinearRankDoc[]::new); + } return topResults; } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index a6f8c580aa32d..8c2aa3ba4ec70 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -319,8 +319,8 @@ setup: - match: { hits.hits.2._id: "4" } - close_to: { hits.hits.2._score: { value: 1.6, error: 0.001 } } - match: { hits.hits.3._id: "3" } - - close_to: { hits.hits.3._score: { value: 1.2, error: 0.001 } } - + - close_to: { hits.hits.3._score: { value: 1.2, error: 0.001} } + --- "should handle all zero scores in normalization": - requires: @@ -1197,6 +1197,111 @@ setup: rank_window_size: -10 - match: { status: 400 } +--- +"linear retriever respects min_score after normalization": + + - requires: + cluster_features: [ "linear_retriever_minscore_fix" ] + reason: test min_score functionality for linear retriever + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: + - retriever: + standard: + query: + function_score: + query: + match_all: {} + functions: + - filter: { term: { _id: "1" } } + weight: 1 + - filter: { term: { _id: "2" } } + weight: 2 + - filter: { term: { _id: "3" } } + weight: 3 + - filter: { term: { _id: "4" } } + weight: 4 + weight: 1.0 + normalizer: "minmax" + rank_window_size: 10 + min_score: 0.8 + size: 10 + + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "4" } + +--- +"linear retriever with min_score zero includes all docs": + + - requires: + cluster_features: [ "linear_retriever_minscore_fix" ] + reason: test min score functionality for linear retriever + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: {} + } + } + }, + weight: 1.0, + normalizer: "minmax" + } + ] + rank_window_size: 10 + min_score: 0 + size: 10 + + - match: { hits.total.value: 4 } + - length: { hits.hits: 4 } + +--- +"linear retriever with high min_score excludes all docs": + + - requires: + cluster_features: [ "linear_retriever_minscore_fix" ] + reason: test min score functionality for linear retriever + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: {} + } + } + }, + weight: 1.0, + normalizer: "minmax" + } + ] + rank_window_size: 10 + min_score: 2.0 + size: 10 + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 } + --- "minmax normalization properly handles a single doc result set": - requires: From 5d6e53be2968b03755899b31b20c4cdc20b4eaeb Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 12 Jun 2025 17:37:50 +0100 Subject: [PATCH 2/4] Merged textreranking --- docs/changelog/129223.yaml | 5 + .../retriever/CompoundRetrieverBuilder.java | 3 +- .../retriever/RankDocsRetrieverBuilder.java | 7 +- .../RankDocsRetrieverBuilderTests.java | 2 +- .../xpack/inference/InferenceFeatures.java | 1 + .../TextSimilarityRankRetrieverBuilder.java | 26 ++--- .../70_text_similarity_rank_retriever.yml | 108 ++++++++++++++++++ 7 files changed, 134 insertions(+), 18 deletions(-) create mode 100644 docs/changelog/129223.yaml diff --git a/docs/changelog/129223.yaml b/docs/changelog/129223.yaml new file mode 100644 index 0000000000000..ec84ec52c8cf7 --- /dev/null +++ b/docs/changelog/129223.yaml @@ -0,0 +1,5 @@ +pr: 129223 +summary: Fix text similarity reranker does not propagate min score correctly +area: Search +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 6cf0af0ef1541..3f9353251b920 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -195,7 +195,8 @@ public void onFailure(Exception e) { RankDocsRetrieverBuilder rankDocsRetrieverBuilder = new RankDocsRetrieverBuilder( rankWindowSize, newRetrievers.stream().map(s -> s.retriever).toList(), - results::get + results::get, + this.minScore ); rankDocsRetrieverBuilder.retrieverName(retrieverName()); return rankDocsRetrieverBuilder; diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java index a77f5327fbc26..0cdd5ab35adcd 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java @@ -33,13 +33,14 @@ public class RankDocsRetrieverBuilder extends RetrieverBuilder { final List sources; final Supplier rankDocs; - public RankDocsRetrieverBuilder(int rankWindowSize, List sources, Supplier rankDocs) { + public RankDocsRetrieverBuilder(int rankWindowSize, List sources, Supplier rankDocs, Float minScore) { this.rankWindowSize = rankWindowSize; this.rankDocs = rankDocs; if (sources == null || sources.isEmpty()) { throw new IllegalArgumentException("sources must not be null or empty"); } this.sources = sources; + this.minScore = minScore; } @Override @@ -48,7 +49,7 @@ public String getName() { } private boolean sourceHasMinScore() { - return minScore != null || sources.stream().anyMatch(x -> x.minScore() != null); + return this.minScore != null || sources.stream().anyMatch(x -> x.minScore() != null); } private boolean sourceShouldRewrite(QueryRewriteContext ctx) throws IOException { @@ -132,7 +133,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder searchSourceBuilder.size(rankWindowSize); } if (sourceHasMinScore()) { - searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore()); + searchSourceBuilder.minScore(this.minScore == null ? Float.MIN_VALUE : this.minScore); } if (searchSourceBuilder.size() + searchSourceBuilder.from() > rankDocResults.length) { searchSourceBuilder.size(Math.max(0, rankDocResults.length - searchSourceBuilder.from())); diff --git a/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java b/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java index eafab1d25c38e..165ad9b2de183 100644 --- a/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java @@ -97,7 +97,7 @@ private List preFilters(QueryRewriteContext queryRewriteContext) t } private RankDocsRetrieverBuilder createRandomRankDocsRetrieverBuilder(QueryRewriteContext queryRewriteContext) throws IOException { - return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier()); + return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier(), null); } public void testExtractToSearchSourceBuilder() throws IOException { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index c691abe7579bb..396b50eb7cfc6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -61,6 +61,7 @@ public Set getTestFeatures() { SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES, SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED, TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX, + TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_MINSCORE_FIX, SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT, SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT, SEMANTIC_KNN_FILTER_FIX, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 59c90897c020b..10c6df1f76a20 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -24,6 +24,7 @@ import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -49,6 +50,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder "text_similarity_reranker_alias_handling_fix", true ); + public static final NodeFeature TEXT_SIMILARITY_RERANKER_MINSCORE_FIX = new NodeFeature("text_similarity_reranker_minscore_fix"); public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id"); @@ -174,24 +176,22 @@ protected TextSimilarityRankRetrieverBuilder clone( @Override protected RankDoc[] combineInnerRetrieverResults(List rankResults, boolean explain) { assert rankResults.size() == 1; - ScoreDoc[] scoreDocs = rankResults.get(0); - TextSimilarityRankDoc[] textSimilarityRankDocs = new TextSimilarityRankDoc[scoreDocs.length]; + ScoreDoc[] scoreDocs = rankResults.getFirst(); + List filteredDocs = new ArrayList<>(); + // Filtering by min_score must be done here, after reranking. + // Applying min_score in the child retriever could prematurely exclude documents that would receive high scores from the reranker. for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; assert scoreDoc.score >= 0; - if (explain) { - textSimilarityRankDocs[i] = new TextSimilarityRankDoc( - scoreDoc.doc, - scoreDoc.score, - scoreDoc.shardIndex, - inferenceId, - field - ); - } else { - textSimilarityRankDocs[i] = new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex); + if (minScore == null || scoreDoc.score >= minScore) { + if (explain) { + filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex, inferenceId, field)); + } else { + filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex)); + } } } - return textSimilarityRankDocs; + return filteredDocs.toArray(new TextSimilarityRankDoc[0]); } @Override diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index 4b00a5e4016e4..f23dea84474b5 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -381,3 +381,111 @@ setup: - match: { hits.total.value: 1 } - length: { hits.hits: 1 } - match: { hits.hits.0._id: "doc_1" } + +--- +"Text similarity reranker respects min_score": + + - requires: + cluster_features: "text_similarity_reranker_minscore_fix" + reason: test min score functionality + + - do: + index: + index: test-index + id: doc_2 + body: + text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun." + topic: [ "science" ] + subtopic: [ "astronomy" ] + inference_text_field: "10" + refresh: true + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + bool: + should: + - constant_score: + filter: + term: { subtopic: "technology" } + boost: 10 + - constant_score: + filter: + term: { subtopic: "astronomy" } + boost: 1 + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "How often does the moon hide the sun?" + field: inference_text_field + min_score: 10 + size: 10 + + - match: { hits.total.value: 1 } + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "doc_2" } + +--- +"Text similarity reranker with min_score zero includes all docs": + + - requires: + cluster_features: "text_similarity_reranker_minscore_fix" + reason: test min score functionality + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match_all: {} + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "How often does the moon hide the sun?" + field: inference_text_field + min_score: 0 + size: 10 + + - match: { hits.total.value: 3 } + - length: { hits.hits: 3 } + +--- +"Text similarity reranker with high min_score excludes all docs": + + - requires: + cluster_features: "text_similarity_reranker_minscore_fix" + reason: test min score functionality + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + match_all: {} + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "How often does the moon hide the sun?" + field: inference_text_field + min_score: 1000 + size: 10 + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 } From 94ca38aa13c0852da1a81c2a3d2acc4afefe61f1 Mon Sep 17 00:00:00 2001 From: Mridula Date: Mon, 16 Jun 2025 19:57:13 +0100 Subject: [PATCH 3/4] build successful --- .../rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index 10c6df1f76a20..2bfa0d01e7c7d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -176,7 +176,7 @@ protected TextSimilarityRankRetrieverBuilder clone( @Override protected RankDoc[] combineInnerRetrieverResults(List rankResults, boolean explain) { assert rankResults.size() == 1; - ScoreDoc[] scoreDocs = rankResults.getFirst(); + ScoreDoc[] scoreDocs = rankResults.get(0); List filteredDocs = new ArrayList<>(); // Filtering by min_score must be done here, after reranking. // Applying min_score in the child retriever could prematurely exclude documents that would receive high scores from the reranker. From 1cf32ae1c8350b20839e88f40fa2c25f74bd97f0 Mon Sep 17 00:00:00 2001 From: Mridula Date: Mon, 16 Jun 2025 19:59:40 +0100 Subject: [PATCH 4/4] Delete docs/changelog/129223.yaml --- docs/changelog/129223.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 docs/changelog/129223.yaml diff --git a/docs/changelog/129223.yaml b/docs/changelog/129223.yaml deleted file mode 100644 index ec84ec52c8cf7..0000000000000 --- a/docs/changelog/129223.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 129223 -summary: Fix text similarity reranker does not propagate min score correctly -area: Search -type: bug -issues: []