From 78fa6af7eee69eaf1fad956b356c465d07c33aba Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 25 Jun 2025 11:38:36 +0100 Subject: [PATCH 01/51] Per component normalizer is removed --- .../rank/linear/LinearRetrieverComponent.java | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index bb0d79d3fe488..5ce1ba42ab1d3 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -25,20 +25,17 @@ public class LinearRetrieverComponent implements ToXContentObject { public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField WEIGHT_FIELD = new ParseField("weight"); - public static final ParseField NORMALIZER_FIELD = new ParseField("normalizer"); + public static final String DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE.getName(); static final float DEFAULT_WEIGHT = 1f; - static final ScoreNormalizer DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE; RetrieverBuilder retriever; float weight; - ScoreNormalizer normalizer; - public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, ScoreNormalizer normalizer) { + public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight) { assert retrieverBuilder != null; this.retriever = retrieverBuilder; this.weight = weight == null ? DEFAULT_WEIGHT : weight; - this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer; if (this.weight < 0) { throw new IllegalArgumentException("[weight] must be non-negative"); } @@ -48,7 +45,6 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(RETRIEVER_FIELD.getPreferredName(), retriever); builder.field(WEIGHT_FIELD.getPreferredName(), weight); - builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); return builder; } @@ -59,8 +55,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws args -> { RetrieverBuilder retrieverBuilder = (RetrieverBuilder) args[0]; Float weight = (Float) args[1]; - ScoreNormalizer normalizer = (ScoreNormalizer) args[2]; - return new LinearRetrieverComponent(retrieverBuilder, weight, normalizer); + return new LinearRetrieverComponent(retrieverBuilder, weight); } ); @@ -71,12 +66,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return innerRetriever; }, RETRIEVER_FIELD); PARSER.declareFloat(optionalConstructorArg(), WEIGHT_FIELD); - PARSER.declareField( - optionalConstructorArg(), - (p, c) -> ScoreNormalizer.valueOf(p.text()), - NORMALIZER_FIELD, - ObjectParser.ValueType.STRING - ); } public static LinearRetrieverComponent fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { From d4b1ced34c7876e0fffca58dbd5596a5221b0b54 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 25 Jun 2025 11:50:26 +0100 Subject: [PATCH 02/51] Modified LinearRetrieverBuilder to propagate top level normalizer to each and every sub level --- .../rank/linear/LinearRetrieverBuilder.java | 17 +++-------------- .../rank/linear/LinearRetrieverComponent.java | 2 +- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index c1a3f7d174487..9965199541aa2 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -43,6 +43,7 @@ import static org.elasticsearch.action.ValidateActions.addValidationError; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; import static org.elasticsearch.xpack.rank.RankRRFFeatures.LINEAR_RETRIEVER_SUPPORTED; +import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_NORMALIZER; import static org.elasticsearch.xpack.rank.linear.LinearRetrieverComponent.DEFAULT_WEIGHT; /** @@ -82,17 +83,17 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder retrieverComponents = args[0] == null ? List.of() : (List) args[0]; List fields = (List) args[1]; String query = (String) args[2]; - ScoreNormalizer normalizer = args[3] == null ? null : ScoreNormalizer.valueOf((String) args[3]); + ScoreNormalizer normalizer = args[3] == null ? DEFAULT_NORMALIZER : ScoreNormalizer.valueOf((String) args[3]); int rankWindowSize = args[4] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[4]; int index = 0; float[] weights = new float[retrieverComponents.size()]; ScoreNormalizer[] normalizers = new ScoreNormalizer[retrieverComponents.size()]; + Arrays.fill(normalizers, normalizer); List innerRetrievers = new ArrayList<>(); for (LinearRetrieverComponent component : retrieverComponents) { innerRetrievers.add(RetrieverSource.from(component.retriever)); weights[index] = component.weight; - normalizers[index] = component.normalizer; index++; } return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers); @@ -221,17 +222,6 @@ public ActionRequestValidationException validate( ), validationException ); - } else if (innerRetrievers.isEmpty() == false && normalizer != null) { - validationException = addValidationError( - String.format( - Locale.ROOT, - "[%s] [%s] cannot be provided when [%s] is specified", - getName(), - NORMALIZER_FIELD.getPreferredName(), - RETRIEVERS_FIELD.getPreferredName() - ), - validationException - ); } return validationException; @@ -393,7 +383,6 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.startObject(); builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); - builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); builder.endObject(); index++; } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index 5ce1ba42ab1d3..30e7514e225e5 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -25,7 +25,7 @@ public class LinearRetrieverComponent implements ToXContentObject { public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField WEIGHT_FIELD = new ParseField("weight"); - public static final String DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE.getName(); + public static final ScoreNormalizer DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE; static final float DEFAULT_WEIGHT = 1f; From 83763e99c44b621491b4470bbd247c89558bc4fb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 3 Jul 2025 13:49:30 +0000 Subject: [PATCH 03/51] [CI] Auto commit changes from spotless --- .../xpack/rank/linear/LinearRetrieverComponent.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index 30e7514e225e5..cbaf8b52b981b 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -10,7 +10,6 @@ import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverParserContext; import org.elasticsearch.xcontent.ConstructingObjectParser; -import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; From 7066b103a992df5ed3b89ae9694562c5fdfd6be8 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 9 Jul 2025 10:56:51 +0100 Subject: [PATCH 04/51] Component is modified --- .../rank/linear/LinearRetrieverComponent.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index cbaf8b52b981b..24bd9a42bc1dc 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -24,17 +24,20 @@ public class LinearRetrieverComponent implements ToXContentObject { public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField WEIGHT_FIELD = new ParseField("weight"); - public static final ScoreNormalizer DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE; - + public static final ParseField NORMALIZER_FIELD = new ParseField("normalizer"); + static final float DEFAULT_WEIGHT = 1f; + static final ScoreNormalizer DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE; RetrieverBuilder retriever; float weight; + ScoreNormalizer normalizer; - public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight) { + public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, ScoreNormalizer normalizer) { assert retrieverBuilder != null; this.retriever = retrieverBuilder; this.weight = weight == null ? DEFAULT_WEIGHT : weight; + this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer; if (this.weight < 0) { throw new IllegalArgumentException("[weight] must be non-negative"); } @@ -44,6 +47,9 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight) public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(RETRIEVER_FIELD.getPreferredName(), retriever); builder.field(WEIGHT_FIELD.getPreferredName(), weight); + if (normalizer != null && !normalizer.equals(DEFAULT_NORMALIZER)) { + builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); + } return builder; } @@ -54,7 +60,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws args -> { RetrieverBuilder retrieverBuilder = (RetrieverBuilder) args[0]; Float weight = (Float) args[1]; - return new LinearRetrieverComponent(retrieverBuilder, weight); + ScoreNormalizer normalizer = (ScoreNormalizer) args[2]; + return new LinearRetrieverComponent(retrieverBuilder, weight, normalizer); } ); @@ -65,6 +72,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return innerRetriever; }, RETRIEVER_FIELD); PARSER.declareFloat(optionalConstructorArg(), WEIGHT_FIELD); + PARSER.declareField( + optionalConstructorArg(), + (p, c) -> ScoreNormalizer.valueOf(p.text()), + NORMALIZER_FIELD, + ObjectParser.ValueType.STRING + ); } public static LinearRetrieverComponent fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { From d8de3615390bac36e44ada6e19d735ae4761c420 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 9 Jul 2025 11:24:02 +0100 Subject: [PATCH 05/51] Retriever builder is also modified according to the new changes: --- .../rank/linear/LinearRetrieverBuilder.java | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 9965199541aa2..3eb1ebc670ee1 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -83,7 +83,8 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder retrieverComponents = args[0] == null ? List.of() : (List) args[0]; List fields = (List) args[1]; String query = (String) args[2]; - ScoreNormalizer normalizer = args[3] == null ? DEFAULT_NORMALIZER : ScoreNormalizer.valueOf((String) args[3]); + String normalizerName = (String) args[3]; + ScoreNormalizer normalizer = normalizerName == null ? null : ScoreNormalizer.valueOf(normalizerName); int rankWindowSize = args[4] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[4]; int index = 0; @@ -94,6 +95,7 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder Date: Wed, 9 Jul 2025 10:33:29 +0000 Subject: [PATCH 06/51] [CI] Auto commit changes from spotless --- .../xpack/rank/linear/LinearRetrieverBuilder.java | 3 +-- .../xpack/rank/linear/LinearRetrieverComponent.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 3eb1ebc670ee1..f9c6004ee0560 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -384,8 +384,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { if (normalizer != null) { ScoreNormalizer[] newNormalizers = new ScoreNormalizer[normalizers.length]; for (int i = 0; i < normalizers.length; i++) { - newNormalizers[i] = (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) - ? normalizer : normalizers[i]; + newNormalizers[i] = (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) ? normalizer : normalizers[i]; } return new LinearRetrieverBuilder(innerRetrievers, fields, query, null, rankWindowSize, weights, newNormalizers); } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index 24bd9a42bc1dc..11a1123afbfef 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -25,7 +25,7 @@ public class LinearRetrieverComponent implements ToXContentObject { public static final ParseField RETRIEVER_FIELD = new ParseField("retriever"); public static final ParseField WEIGHT_FIELD = new ParseField("weight"); public static final ParseField NORMALIZER_FIELD = new ParseField("normalizer"); - + static final float DEFAULT_WEIGHT = 1f; static final ScoreNormalizer DEFAULT_NORMALIZER = IdentityScoreNormalizer.INSTANCE; From 3b87c15dd64b4e61919835990912322bdd42f3e5 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 9 Jul 2025 11:36:17 +0100 Subject: [PATCH 07/51] Spotless check done --- .../xpack/rank/linear/LinearRetrieverComponent.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index 11a1123afbfef..ee9c426f8d2bd 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -10,6 +10,7 @@ import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverParserContext; import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; From e3326e2508f62a53506e109af3f8fe8a3e0ac4f4 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 10 Jul 2025 17:42:16 +0100 Subject: [PATCH 08/51] Code changes made --- .../elasticsearch/rest-apis/retrievers.md | 25 ++-- .../rank/linear/LinearRetrieverBuilder.java | 79 +++++++++--- .../rank/linear/LinearRetrieverComponent.java | 4 +- .../test/linear/10_linear_retriever.yml | 117 ++++++++++++++++++ 4 files changed, 195 insertions(+), 30 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md index cbcae05e42681..7cca65e9ac604 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers.md @@ -263,8 +263,21 @@ A retriever that normalizes and linearly combines the scores of other retrievers A list of the sub-retrievers' configuration, that we will take into account and whose result sets we will merge through a weighted sum. Each configuration can have a different weight and normalization depending on the specified retriever. +`normalizer` +: (Optional, String) + + Specifies a normalizer to be applied to all sub-retrievers. This provides a simple way to configure normalization for all retrievers at once. + + The `normalizer` can be specified at the top level, at the per-retriever level, or both, with the following rules: + + * If only the top-level `normalizer` is specified, it applies to all sub-retrievers. + * If both a top-level and a per-retriever `normalizer` are specified, the per-retriever normalizer must be identical to the top-level one. If they differ, the request will fail. + * If only per-retriever normalizers are specified, they can be different for each sub-retriever. + * If no normalizer is specified at any level, no normalization is applied. -Each entry specifies the following parameters: + Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`. + +Each entry in the `retrievers` array specifies the following parameters: `retriever` : (Required, a `retriever` object) @@ -279,19 +292,13 @@ Each entry specifies the following parameters: `normalizer` : (Optional, String) - - Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`. + Specifies how we will normalize this specific retriever’s scores, before applying the specified `weight`. If a top-level `normalizer` is also specified, this normalizer must be the same. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`. * `none` * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula - - ``` - score = (score - min) / (max - min) - ``` - + `score = (score - min) / (max - min)` * `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values. -See also [this hybrid search example](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. - `rank_window_size` : (Optional, integer) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index f9c6004ee0560..1c5c7f038b2aa 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -83,14 +83,12 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder retrieverComponents = args[0] == null ? List.of() : (List) args[0]; List fields = (List) args[1]; String query = (String) args[2]; - String normalizerName = (String) args[3]; - ScoreNormalizer normalizer = normalizerName == null ? null : ScoreNormalizer.valueOf(normalizerName); + ScoreNormalizer normalizer = args[3] == null ? null : ScoreNormalizer.valueOf((String) args[3]); int rankWindowSize = args[4] == null ? RankBuilder.DEFAULT_RANK_WINDOW_SIZE : (int) args[4]; int index = 0; float[] weights = new float[retrieverComponents.size()]; ScoreNormalizer[] normalizers = new ScoreNormalizer[retrieverComponents.size()]; - Arrays.fill(normalizers, normalizer); List innerRetrievers = new ArrayList<>(); for (LinearRetrieverComponent component : retrieverComponents) { innerRetrievers.add(RetrieverSource.from(component.retriever)); @@ -121,8 +119,7 @@ private static float[] getDefaultWeight(List innerRetrievers) { private static ScoreNormalizer[] getDefaultNormalizers(List innerRetrievers) { int size = innerRetrievers != null ? innerRetrievers.size() : 0; ScoreNormalizer[] normalizers = new ScoreNormalizer[size]; - Arrays.fill(normalizers, IdentityScoreNormalizer.INSTANCE); - return normalizers; + return new ScoreNormalizer[size]; } public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { @@ -160,17 +157,44 @@ public LinearRetrieverBuilder( // Use a mutable list for innerRetrievers so that we can use addChild super(innerRetrievers == null ? new ArrayList<>() : new ArrayList<>(innerRetrievers), rankWindowSize); if (weights.length != this.innerRetrievers.size()) { - throw new IllegalArgumentException("The number of weights must match the number of inner retrievers"); + throw new IllegalArgumentException( + "[" + + NAME + + "] the number of weights must be equal to the number of retrievers, but found [" + + weights.length + + "] weights and [" + + this.innerRetrievers.size() + + "] retrievers" + ); } if (normalizers.length != this.innerRetrievers.size()) { - throw new IllegalArgumentException("The number of normalizers must match the number of inner retrievers"); + throw new IllegalArgumentException( + "[" + + NAME + + "] the number of normalizers must be equal to the number of retrievers, but found [" + + normalizers.length + + "] normalizers and [" + + this.innerRetrievers.size() + + "] retrievers" + ); } - - this.fields = fields == null ? null : List.copyOf(fields); - this.query = query; - this.normalizer = normalizer; this.weights = weights; this.normalizers = normalizers; + this.fields = fields; + this.query = query; + this.normalizer = normalizer; + + if (normalizer != null) { + for (ScoreNormalizer subNormalizer : normalizers) { + if (subNormalizer != null && subNormalizer.equals(DEFAULT_NORMALIZER) == false && subNormalizer.equals(normalizer) == false) { + throw new IllegalArgumentException( + "top-level normalizer [" + + normalizer.getName() + + "] is specified and it should be the same as all sub-retriever normalizers" + ); + } + } + } } public LinearRetrieverBuilder( @@ -228,8 +252,9 @@ public ActionRequestValidationException validate( if (normalizer != null) { for (ScoreNormalizer perRetrieverNormalizer : normalizers) { - boolean isExplicitSubNormalizer = perRetrieverNormalizer != null && !perRetrieverNormalizer.equals(DEFAULT_NORMALIZER); - boolean isMismatch = isExplicitSubNormalizer && !perRetrieverNormalizer.equals(normalizer); + boolean isExplicitSubNormalizer = perRetrieverNormalizer != null + && perRetrieverNormalizer.equals(DEFAULT_NORMALIZER) == false; + boolean isMismatch = isExplicitSubNormalizer && perRetrieverNormalizer.equals(normalizer) == false; if (isMismatch) { validationException = addValidationError( String.format( @@ -383,10 +408,26 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { } if (normalizer != null) { ScoreNormalizer[] newNormalizers = new ScoreNormalizer[normalizers.length]; - for (int i = 0; i < normalizers.length; i++) { - newNormalizers[i] = (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) ? normalizer : normalizers[i]; - } - return new LinearRetrieverBuilder(innerRetrievers, fields, query, null, rankWindowSize, weights, newNormalizers); + Arrays.fill(newNormalizers, normalizer); + rewritten = new LinearRetrieverBuilder( + rewritten.innerRetrievers, + rewritten.fields, + rewritten.query, + null, + rewritten.rankWindowSize, + rewritten.weights, + newNormalizers + ); + } else { + rewritten = new LinearRetrieverBuilder( + rewritten.innerRetrievers, + rewritten.fields, + rewritten.query, + rewritten.normalizer, + rewritten.rankWindowSize, + rewritten.weights, + rewritten.normalizers + ); } return rewritten; @@ -413,7 +454,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.startObject(); builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); - if (normalizers[index] != null && !normalizers[index].equals(DEFAULT_NORMALIZER)) { + if (normalizers[index] != null && normalizers[index].equals(DEFAULT_NORMALIZER) == false) { builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); } builder.endObject(); @@ -432,7 +473,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept if (query != null) { builder.field(QUERY_FIELD.getPreferredName(), query); } - if (normalizer != null && !normalizer.equals(DEFAULT_NORMALIZER)) { + if (normalizer != null && normalizer.equals(DEFAULT_NORMALIZER) == false) { builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index ee9c426f8d2bd..7da03860d9a26 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -38,7 +38,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, assert retrieverBuilder != null; this.retriever = retrieverBuilder; this.weight = weight == null ? DEFAULT_WEIGHT : weight; - this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer; + this.normalizer = normalizer; if (this.weight < 0) { throw new IllegalArgumentException("[weight] must be non-negative"); } @@ -48,7 +48,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(RETRIEVER_FIELD.getPreferredName(), retriever); builder.field(WEIGHT_FIELD.getPreferredName(), weight); - if (normalizer != null && !normalizer.equals(DEFAULT_NORMALIZER)) { + if (normalizer != null && normalizer.equals(DEFAULT_NORMALIZER) == false) { builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); } return builder; diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index f62c7e4987046..f659dd2126077 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1333,3 +1333,120 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "1" } - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } + +--- +"top level normalizer should apply to all sub-retrievers": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: "minmax" + retrievers: [ + { + retriever: { + standard: { + query: { + bool: { + should: [ + { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 10.0 } }, + { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 9.0 } }, + { constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 5.0 } } + ] + } + } + } + }, + weight: 10.0 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [ 4 ], + k: 1, + num_candidates: 1 + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 10.0, error: 0.001} } + - match: { hits.hits.1._id: "2" } + - close_to: { hits.hits.1._score: { value: 8.0, error: 0.001} } + - match: { hits.hits.2._id: "4" } + - close_to: { hits.hits.2._score: { value: 2.0, error: 0.001} } + - match: { hits.hits.3._id: "3" } + - close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } } + +--- +"should throw on different normalizers": + - do: + catch: /top-level normalizer \[minmax\] is specified and it should be the same as all sub-retriever normalizers/ + search: + index: test + body: + retriever: + linear: + normalizer: "minmax" + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: "l2_norm" + } + ] + +--- +"should not throw on same normalizers": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: "minmax" + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: "minmax" + } + ] + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } From d36ada2f382bf29e01fbb6b4e79f28cfd9c4532c Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 11 Jul 2025 14:31:53 +0100 Subject: [PATCH 09/51] FIX: Cast rewritten builder in LinearRetrieverBuilder --- .../rank/linear/LinearRetrieverBuilder.java | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 1c5c7f038b2aa..d9db1216a7a29 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -406,27 +406,32 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { rewritten = new StandardRetrieverBuilder(new MatchNoneQueryBuilder()); } } + if (rewritten instanceof LinearRetrieverBuilder == false) { + return rewritten; + } + LinearRetrieverBuilder linearRewritten = (LinearRetrieverBuilder) rewritten; + if (normalizer != null) { - ScoreNormalizer[] newNormalizers = new ScoreNormalizer[normalizers.length]; + ScoreNormalizer[] newNormalizers = new ScoreNormalizer[linearRewritten.normalizers.length]; Arrays.fill(newNormalizers, normalizer); rewritten = new LinearRetrieverBuilder( - rewritten.innerRetrievers, - rewritten.fields, - rewritten.query, + linearRewritten.innerRetrievers, + linearRewritten.fields, + linearRewritten.query, null, - rewritten.rankWindowSize, - rewritten.weights, + linearRewritten.rankWindowSize, + linearRewritten.weights, newNormalizers ); } else { rewritten = new LinearRetrieverBuilder( - rewritten.innerRetrievers, - rewritten.fields, - rewritten.query, - rewritten.normalizer, - rewritten.rankWindowSize, - rewritten.weights, - rewritten.normalizers + linearRewritten.innerRetrievers, + linearRewritten.fields, + linearRewritten.query, + linearRewritten.normalizer, + linearRewritten.rankWindowSize, + linearRewritten.weights, + linearRewritten.normalizers ); } From 330e32be09c7fc87e9f1f1fbf2b0604e72a2a71b Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 16:23:31 +0100 Subject: [PATCH 10/51] modified the builder --- .../rank/linear/LinearRetrieverBuilder.java | 51 +++++++++---------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index d9db1216a7a29..fe6985c6c61a6 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -119,7 +119,8 @@ private static float[] getDefaultWeight(List innerRetrievers) { private static ScoreNormalizer[] getDefaultNormalizers(List innerRetrievers) { int size = innerRetrievers != null ? innerRetrievers.size() : 0; ScoreNormalizer[] normalizers = new ScoreNormalizer[size]; - return new ScoreNormalizer[size]; + Arrays.fill(normalizers, DEFAULT_NORMALIZER); + return normalizers; } public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { @@ -185,16 +186,32 @@ public LinearRetrieverBuilder( this.normalizer = normalizer; if (normalizer != null) { - for (ScoreNormalizer subNormalizer : normalizers) { - if (subNormalizer != null && subNormalizer.equals(DEFAULT_NORMALIZER) == false && subNormalizer.equals(normalizer) == false) { + // First pass: validate that any specified per-retriever normalizers match the top-level one + for (int i = 0; i < normalizers.length; i++) { + ScoreNormalizer subNormalizer = normalizers[i]; + if (subNormalizer != null && !subNormalizer.equals(DEFAULT_NORMALIZER) && !subNormalizer.equals(normalizer)) { throw new IllegalArgumentException( - "top-level normalizer [" + "[" + + NAME + + "] All per-retriever normalizers must match the top-level normalizer: " + + "expected [" + normalizer.getName() - + "] is specified and it should be the same as all sub-retriever normalizers" + + "], found [" + + subNormalizer.getName() + + "] in retriever [" + + i + + "]" ); } } + // Second pass: propagate top-level normalizer to any unspecified positions + for (int i = 0; i < normalizers.length; i++) { + if (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) { + normalizers[i] = normalizer; + } + } } + } public LinearRetrieverBuilder( @@ -250,28 +267,6 @@ public ActionRequestValidationException validate( ); } - if (normalizer != null) { - for (ScoreNormalizer perRetrieverNormalizer : normalizers) { - boolean isExplicitSubNormalizer = perRetrieverNormalizer != null - && perRetrieverNormalizer.equals(DEFAULT_NORMALIZER) == false; - boolean isMismatch = isExplicitSubNormalizer && perRetrieverNormalizer.equals(normalizer) == false; - if (isMismatch) { - validationException = addValidationError( - String.format( - Locale.ROOT, - "[%s] top-level [%s] is [%s] but a sub-retriever specifies [%s]", - getName(), - NORMALIZER_FIELD.getPreferredName(), - normalizer.getName(), - perRetrieverNormalizer.getName() - ), - validationException - ); - break; - } - } - } - return validationException; } @@ -418,7 +413,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { linearRewritten.innerRetrievers, linearRewritten.fields, linearRewritten.query, - null, + normalizer, linearRewritten.rankWindowSize, linearRewritten.weights, newNormalizers From c35a28ee40956b56d6ccab429d1c2aa936d8df4f Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 16:27:41 +0100 Subject: [PATCH 11/51] Update retrievers.md --- docs/reference/elasticsearch/rest-apis/retrievers.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md index 7cca65e9ac604..539fd96276e3f 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers.md @@ -296,7 +296,9 @@ Each entry in the `retrievers` array specifies the following parameters: * `none` * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula - `score = (score - min) / (max - min)` + ``` + score = (score - min) / (max - min) + ``` * `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values. `rank_window_size` From b3d7f5facf09b51e994fd4294f4c85a9142d4124 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 16:29:22 +0100 Subject: [PATCH 12/51] Update retrievers.md --- docs/reference/elasticsearch/rest-apis/retrievers.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md index 539fd96276e3f..11365a078b1be 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers.md @@ -296,10 +296,14 @@ Each entry in the `retrievers` array specifies the following parameters: * `none` * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula + ``` score = (score - min) / (max - min) ``` + * `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values. + See also [this hybrid search example](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. + `rank_window_size` : (Optional, integer) From 8ac90e663219b4761b4cabd04aeb5a29fc9800d2 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 16:31:44 +0100 Subject: [PATCH 13/51] Update docs/changelog/129693.yaml --- docs/changelog/129693.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/129693.yaml diff --git a/docs/changelog/129693.yaml b/docs/changelog/129693.yaml new file mode 100644 index 0000000000000..885928f8db28c --- /dev/null +++ b/docs/changelog/129693.yaml @@ -0,0 +1,5 @@ +pr: 129693 +summary: Linear retriever top level option for normalizer +area: Search +type: enhancement +issues: [] From 0309a65a4fc2d323e32c30dfff52c6b7a894e037 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 16:32:03 +0100 Subject: [PATCH 14/51] Update docs/changelog/129693.yaml Co-authored-by: Kathleen DeRusso --- docs/changelog/129693.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog/129693.yaml b/docs/changelog/129693.yaml index 885928f8db28c..8edab59b7d03f 100644 --- a/docs/changelog/129693.yaml +++ b/docs/changelog/129693.yaml @@ -1,5 +1,5 @@ pr: 129693 -summary: Linear retriever top level option for normalizer +summary: Add top level normalizer for linear retriever area: Search type: enhancement issues: [] From 299a222c5b54a9aa4006bbbfe8423df78b98ec0b Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 16:35:57 +0100 Subject: [PATCH 15/51] Update retrievers.md --- docs/reference/elasticsearch/rest-apis/retrievers.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md index 11365a078b1be..1b66548776357 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers.md @@ -296,14 +296,14 @@ Each entry in the `retrievers` array specifies the following parameters: * `none` * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula - + ``` score = (score - min) / (max - min) ``` - + * `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values. - See also [this hybrid search example](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. +See also [this hybrid search example](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. `rank_window_size` : (Optional, integer) From b6d5109247fb4cfe4d72a2215c62abec190fd426 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 16:50:17 +0100 Subject: [PATCH 16/51] WIP --- .../rank/linear/LinearRetrieverBuilder.java | 55 ++-- .../10_linear_retriever_normalizers.yml | 247 ++++++++++++++++++ 2 files changed, 276 insertions(+), 26 deletions(-) create mode 100644 x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index fe6985c6c61a6..077b58a1fbbb0 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -123,6 +123,34 @@ private static ScoreNormalizer[] getDefaultNormalizers(List inn return normalizers; } + private void normalizeNormalizerArray(ScoreNormalizer topLevelNormalizer, ScoreNormalizer[] normalizers) { + for (int i = 0; i < normalizers.length; i++) { + ScoreNormalizer current = normalizers[i]; + + if (topLevelNormalizer != null) { + // Validate explicit per-retriever normalizers match top-level + if (current != null && !current.equals(DEFAULT_NORMALIZER) && !current.equals(topLevelNormalizer)) { + throw new IllegalArgumentException( + String.format( + "[%s] All per-retriever normalizers must match the top-level normalizer: " + + "expected [%s], found [%s] in retriever [%d]", + NAME, topLevelNormalizer.getName(), current.getName(), i + ) + ); + } + // Propagate top-level normalizer to unspecified positions + if (current == null || current.equals(DEFAULT_NORMALIZER)) { + normalizers[i] = topLevelNormalizer; + } + } else { + // No top-level normalizer: ensure null values become DEFAULT_NORMALIZER + if (current == null) { + normalizers[i] = DEFAULT_NORMALIZER; + } + } + } + } + public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { if (context.clusterSupportsFeature(LINEAR_RETRIEVER_SUPPORTED) == false) { throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]"); @@ -185,32 +213,7 @@ public LinearRetrieverBuilder( this.query = query; this.normalizer = normalizer; - if (normalizer != null) { - // First pass: validate that any specified per-retriever normalizers match the top-level one - for (int i = 0; i < normalizers.length; i++) { - ScoreNormalizer subNormalizer = normalizers[i]; - if (subNormalizer != null && !subNormalizer.equals(DEFAULT_NORMALIZER) && !subNormalizer.equals(normalizer)) { - throw new IllegalArgumentException( - "[" - + NAME - + "] All per-retriever normalizers must match the top-level normalizer: " - + "expected [" - + normalizer.getName() - + "], found [" - + subNormalizer.getName() - + "] in retriever [" - + i - + "]" - ); - } - } - // Second pass: propagate top-level normalizer to any unspecified positions - for (int i = 0; i < normalizers.length; i++) { - if (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) { - normalizers[i] = normalizer; - } - } - } + normalizeNormalizerArray(normalizer, normalizers); } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml new file mode 100644 index 0000000000000..d99cda3185027 --- /dev/null +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml @@ -0,0 +1,247 @@ +setup: + - requires: + cluster_features: [ "linear_retriever_supported", "linear_retriever.l2_norm" ] + reason: "Support for linear retriever and L2 normalization" + test_runner_features: close_to + + - do: + indices.create: + index: test + body: + mappings: + properties: + vector: + type: dense_vector + dims: 1 + index: true + similarity: l2_norm + index_options: + type: flat + keyword: + type: keyword + other_keyword: + type: keyword + timestamp: + type: date + + - do: + bulk: + refresh: true + index: test + body: + - '{"index": {"_id": 1 }}' + - '{"vector": [1], "keyword": "one", "other_keyword": "other", "timestamp": "2021-01-01T00:00:00"}' + - '{"index": {"_id": 2 }}' + - '{"vector": [2], "keyword": "two", "timestamp": "2022-01-01T00:00:00"}' + - '{"index": {"_id": 3 }}' + - '{"vector": [3], "keyword": "three", "timestamp": "2023-01-01T00:00:00"}' + - '{"index": {"_id": 4 }}' + - '{"vector": [4], "keyword": "four", "other_keyword": "other", "timestamp": "2024-01-01T00:00:00"}' + +--- +"Linear retriever with top-level L2 normalization": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 5.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 12.0 + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "4" } # Doc 4 should rank higher with normalized scores + - match: { hits.hits.1._id: "1" } + # With L2 normalization: [5.0, 12.0] becomes [5.0/13.0, 12.0/13.0] + - close_to: { hits.hits.0._score: { value: 0.923, error: 0.01} } # 12.0/13.0 + - close_to: { hits.hits.1._score: { value: 0.385, error: 0.01} } # 5.0/13.0 + +--- +"Linear retriever with per-retriever L2 normalization": + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 5.0 + } + } + } + }, + weight: 1.0, + normalizer: l2_norm + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 12.0 + } + } + } + }, + weight: 1.0, + normalizer: l2_norm + } + ] + + - match: { hits.total.value: 2 } + # With per-retriever L2 normalization, both scores would be normalized to 1.0 + # So final score = 1.0 * weight1 + 1.0 * weight2 = 2.0 for each doc + # Then sorting is done by _doc (or some other tiebreaker) + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.01} } + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.01} } + +--- +"Linear retriever with mixed normalization (top-level and per-retriever with same normalizer)": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 5.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 12.0 + } + } + } + }, + weight: 1.0, + normalizer: l2_norm + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.1._id: "1" } + # With L2 normalization: [5.0, 12.0] becomes [5.0/13.0, 12.0/13.0] + - close_to: { hits.hits.0._score: { value: 0.923, error: 0.01} } + - close_to: { hits.hits.1._score: { value: 0.385, error: 0.01} } + +--- +"Linear retriever with mismatched normalizers (should fail)": + - do: + catch: bad_request + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: {} + } + } + } + }, + { + retriever: { + standard: { + query: { + match_all: {} + } + } + }, + normalizer: minmax + } + ] + + - match: { error.root_cause.0.type: "illegal_argument_exception" } + - match: { error.root_cause.0.reason: /.*All per-retriever normalizers must match the top-level normalizer.*/ } From 08729250e22b20d84ef0e652b8d966de56b9a792 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 25 Jul 2025 16:00:59 +0000 Subject: [PATCH 17/51] [CI] Auto commit changes from spotless --- .../xpack/rank/linear/LinearRetrieverBuilder.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 077b58a1fbbb0..8224c45e3cfd3 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -126,15 +126,18 @@ private static ScoreNormalizer[] getDefaultNormalizers(List inn private void normalizeNormalizerArray(ScoreNormalizer topLevelNormalizer, ScoreNormalizer[] normalizers) { for (int i = 0; i < normalizers.length; i++) { ScoreNormalizer current = normalizers[i]; - + if (topLevelNormalizer != null) { // Validate explicit per-retriever normalizers match top-level if (current != null && !current.equals(DEFAULT_NORMALIZER) && !current.equals(topLevelNormalizer)) { throw new IllegalArgumentException( String.format( "[%s] All per-retriever normalizers must match the top-level normalizer: " - + "expected [%s], found [%s] in retriever [%d]", - NAME, topLevelNormalizer.getName(), current.getName(), i + + "expected [%s], found [%s] in retriever [%d]", + NAME, + topLevelNormalizer.getName(), + current.getName(), + i ) ); } From ff65d27669a35c106cf967e1a6031d9897754991 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 25 Jul 2025 17:00:43 +0100 Subject: [PATCH 18/51] WIP --- .../rank/linear/LinearRetrieverBuilder.java | 11 +++++++ .../LinearRetrieverBuilderParsingTests.java | 32 +++++++++++++++---- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 8224c45e3cfd3..05e921591f6bd 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -271,6 +271,17 @@ public ActionRequestValidationException validate( ), validationException ); + } else if (innerRetrievers.isEmpty() == false && normalizer != null) { + validationException = addValidationError( + String.format( + Locale.ROOT, + "[%s] [%s] cannot be provided when [%s] is specified", + getName(), + NORMALIZER_FIELD.getPreferredName(), + RETRIEVERS_FIELD.getPreferredName() + ), + validationException + ); } return validationException; diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 74e18bf12fffc..dc57a6a175167 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -62,12 +62,32 @@ protected LinearRetrieverBuilder createTestInstance() { List innerRetrievers = new ArrayList<>(); float[] weights = new float[num]; ScoreNormalizer[] normalizers = new ScoreNormalizer[num]; - for (int i = 0; i < num; i++) { - innerRetrievers.add( - new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) - ); - weights[i] = randomFloat(); - normalizers[i] = randomScoreNormalizer(); + // Create normalizer combinations that follow the API design rules + if (normalizer != null) { + // When top-level normalizer is specified, per-retriever normalizers must either: + // 1. Be null/default (will be propagated), or + // 2. Exactly match the top-level normalizer + boolean useMatchingNormalizers = randomBoolean(); + for (int i = 0; i < num; i++) { + innerRetrievers.add( + new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) + ); + weights[i] = randomFloat(); + if (useMatchingNormalizers) { + normalizers[i] = normalizer; // Exactly match top-level + } else { + normalizers[i] = randomBoolean() ? null : IdentityScoreNormalizer.INSTANCE; // Will be propagated + } + } + } else { + // No top-level normalizer: per-retriever normalizers can be anything + for (int i = 0; i < num; i++) { + innerRetrievers.add( + new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) + ); + weights[i] = randomFloat(); + normalizers[i] = randomScoreNormalizer(); + } } return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers); From 3d605eef2a7b0aa2ee1260b68fb0e03e469540a6 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 31 Jul 2025 18:36:58 +0100 Subject: [PATCH 19/51] Resolved errors --- .../rest-apis/retrievers/linear-retriever.md | 51 ++++- .../rank/linear/LinearRetrieverBuilder.java | 29 ++- .../linear/LinearRetrieverBuilderTests.java | 60 ++++++ .../test/linear/10_linear_retriever.yml | 181 ++++++++++++++---- 4 files changed, 259 insertions(+), 62 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index 5008831b72acd..accd772aa0305 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -31,9 +31,12 @@ Combining `query` and `retrievers` is not supported. `normalizer` {applies_to}`stack: ga 9.1` : (Optional, String) - The normalizer to use when using the [multi-field query format](../retrievers.md#multi-field-query-format). + The normalizer to use for score normalization. This serves as the default normalizer for all sub-retrievers. See [normalizers](#linear-retriever-normalizers) for supported values. - Required when `query` is specified. + + When using the [multi-field query format](../retrievers.md#multi-field-query-format), this field is required when `query` is specified. + + When using the `retrievers` array format, this field serves as the default normalizer for all sub-retrievers. Individual sub-retrievers can override this default by specifying their own `normalizer` field. ::::{warning} Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches. @@ -91,3 +94,47 @@ The `linear` retriever supports the following normalizers: score = (score - min) / (max - min) ``` * `l2_norm`: Normalizes scores using the L2 norm of the score values {applies_to}`stack: ga 9.1` + +## Examples [linear-retriever-examples] + +### Top-level normalizer example + +This example shows how to use a top-level normalizer that applies to all sub-retrievers: + +```console +GET my_index/_search +{ + "retriever": { + "linear": { + "retrievers": [ + { + "retriever": { + "standard": { + "query": { + "match": { + "title": "elasticsearch" + } + } + } + }, + "weight": 1.0 + }, + { + "retriever": { + "knn": { + "field": "title_vector", + "query_vector": [0.1, 0.2, 0.3], + "k": 10, + "num_candidates": 100 + } + }, + "weight": 2.0 + } + ], + "normalizer": "minmax" + } + } +} +``` + +In this example, the `minmax` normalizer is applied to both the standard retriever and the kNN retriever. The top-level normalizer serves as a default that can be overridden by individual sub-retrievers. When using the multi-field query format, the top-level normalizer is applied to all generated inner retrievers. diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 05e921591f6bd..7263ab3f7a37f 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -212,12 +212,17 @@ public LinearRetrieverBuilder( } this.weights = weights; this.normalizers = normalizers; - this.fields = fields; - this.query = query; - this.normalizer = normalizer; - - normalizeNormalizerArray(normalizer, normalizers); - + // Apply top-level normalizer priority system: + // 1. Retriever-specific override (if specified) + // 2. Top-level normalizer (if specified) + // 3. Default (IdentityScoreNormalizer.INSTANCE) + ScoreNormalizer effectiveNormalizer = normalizer != null ? normalizer : DEFAULT_NORMALIZER; + for (int i = 0; i < normalizers.length; i++) { + if (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) { + normalizers[i] = effectiveNormalizer; + } + // If per-retriever normalizer is explicitly specified, keep it (allow override) + } } public LinearRetrieverBuilder( @@ -271,19 +276,7 @@ public ActionRequestValidationException validate( ), validationException ); - } else if (innerRetrievers.isEmpty() == false && normalizer != null) { - validationException = addValidationError( - String.format( - Locale.ROOT, - "[%s] [%s] cannot be provided when [%s] is specified", - getName(), - NORMALIZER_FIELD.getPreferredName(), - RETRIEVERS_FIELD.getPreferredName() - ), - validationException - ); } - return validationException; } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index c211440d10bae..f87816fbe4427 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -22,6 +22,7 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.search.builder.PointInTimeBuilder; import org.elasticsearch.search.retriever.CompoundRetrieverBuilder; +import org.elasticsearch.search.retriever.KnnRetrieverBuilder; import org.elasticsearch.search.retriever.RetrieverBuilder; import org.elasticsearch.search.retriever.StandardRetrieverBuilder; import org.elasticsearch.test.ESTestCase; @@ -326,4 +327,63 @@ public int hashCode() { return Objects.hash(retriever, weight, normalizer); } } + + public void testTopLevelNormalizerWithRetrieversArray() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( + "title_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + null, + 10, + 100, + null, + null + ); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), + CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) + ), + null, // fields + null, // query + MinMaxScoreNormalizer.INSTANCE, // top-level normalizer + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f, 2.0f }, + new ScoreNormalizer[] { null, null } + ); + + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); + } + + public void testTopLevelNormalizerWithPerRetrieverOverrides() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( + "title_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + null, + 10, + 100, + null, + null + ); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), + CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) + ), + null, // fields + null, // query + MinMaxScoreNormalizer.INSTANCE, // top-level normalizer + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f, 2.0f }, + new ScoreNormalizer[] { L2ScoreNormalizer.INSTANCE, null } + ); + + assertEquals(L2ScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); + } + } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index f659dd2126077..b9e55aa85e05b 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1335,14 +1335,14 @@ setup: - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } --- -"top level normalizer should apply to all sub-retrievers": +"linear retriever with top-level normalizer - minmax": - do: search: index: test body: retriever: linear: - normalizer: "minmax" + normalizer: minmax retrievers: [ { retriever: { @@ -1351,22 +1351,21 @@ setup: bool: { should: [ { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 10.0 } }, - { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 9.0 } }, - { constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 5.0 } } + { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 5.0 } } ] } } } }, - weight: 10.0 + weight: 1.0 }, { retriever: { knn: { field: "vector", - query_vector: [ 4 ], - k: 1, - num_candidates: 1 + query_vector: [4], + k: 2, + num_candidates: 10 } }, weight: 2.0 @@ -1374,79 +1373,177 @@ setup: ] - match: { hits.total.value: 4 } - - match: { hits.hits.0._id: "1" } - - close_to: { hits.hits.0._score: { value: 10.0, error: 0.001} } - - match: { hits.hits.1._id: "2" } - - close_to: { hits.hits.1._score: { value: 8.0, error: 0.001} } - - match: { hits.hits.2._id: "4" } - - close_to: { hits.hits.2._score: { value: 2.0, error: 0.001} } - - match: { hits.hits.3._id: "3" } - - close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } } + - match: { hits.hits.0._id: "4" } + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.01} } + - match: { hits.hits.1._id: "1" } + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.01} } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 0.0, error: 0.01} } --- -"should throw on different normalizers": +"linear retriever with top-level normalizer - l2_norm": + - requires: + cluster_features: [ "linear_retriever.l2_norm" ] + reason: "Requires l2_norm normalization support in linear retriever" - do: - catch: /top-level normalizer \[minmax\] is specified and it should be the same as all sub-retriever normalizers/ search: index: test body: retriever: linear: - normalizer: "minmax" + normalizer: l2_norm retrievers: [ { retriever: { standard: { query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 10.0 + bool: { + should: [ + { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 3.0 } }, + { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 4.0 } } + ] } } } }, - weight: 1.0, - normalizer: "l2_norm" + weight: 1.0 + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [4], + k: 2, + num_candidates: 10 + } + }, + weight: 2.0 } ] + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "4" } + - close_to: { hits.hits.0._score: { value: 1.79, error: 0.01} } + - match: { hits.hits.1._id: "3" } + - close_to: { hits.hits.1._score: { value: 0.89, error: 0.01} } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 0.8, error: 0.01} } + --- -"should not throw on same normalizers": +"linear retriever with top-level normalizer and per-retriever override": - do: search: index: test body: retriever: linear: - normalizer: "minmax" + normalizer: minmax retrievers: [ { retriever: { standard: { query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 10.0 + bool: { + should: [ + { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 10.0 } }, + { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 5.0 } } + ] } } } }, weight: 1.0, - normalizer: "minmax" + normalizer: l2_norm + }, + { + retriever: { + knn: { + field: "vector", + query_vector: [4], + k: 2, + num_candidates: 10 + } + }, + weight: 2.0 } ] + + - match: { hits.total.value: 4 } + - match: { hits.hits.0._id: "4" } + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.01} } + - match: { hits.hits.1._id: "1" } + - close_to: { hits.hits.1._score: { value: 0.89, error: 0.01} } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 0.45, error: 0.01} } + +--- +"linear retriever with top-level normalizer - multi-field format": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + query: one + fields: [keyword, text] + - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "1" } - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } + +--- +"linear retriever with top-level normalizer - validation test": + - do: + catch: /Unknown normalizer \[invalid\]/ + search: + index: test + body: + retriever: + linear: + normalizer: invalid + retrievers: [ + { + retriever: { + standard: { + query: { + term: { + keyword: { + value: "one" + } + } + } + } + }, + weight: 1.0 + } + ] + +--- +"linear retriever with top-level normalizer - empty results": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + retrievers: [ + { + retriever: { + standard: { + query: { + term: { + keyword: { + value: "nonexistent" + } + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 0 } + - length: { hits.hits: 0 } From 3ec7110c4bb0ff74f15824f6ff0f42b896423ee8 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 31 Jul 2025 19:07:37 +0100 Subject: [PATCH 20/51] Fixed the retrievers --- .../elasticsearch/rest-apis/retrievers.md | 156 +----------------- 1 file changed, 3 insertions(+), 153 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md index 6d80218bbabaf..fa3810a435a01 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers.md @@ -11,7 +11,7 @@ applies_to: A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the [search API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search) that also return top documents such as [`query`](/reference/query-languages/querydsl.md) and [`knn`](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-api-knn). A retriever may have child retrievers where a retriever with two or more children is considered a compound retriever. This allows for complex behavior to be depicted in a tree-like structure, called the retriever tree, which clarifies the order of operations that occur during a search. ::::{tip} -Refer to [*Retrievers*](docs-content://solutions/search/retrievers-overview.md) for a high level overview of the retrievers abstraction. Refer to [Retrievers examples](retrievers/retrievers-examples.md) for additional examples. +Refer to [*Retrievers*](docs-content://solutions/search/retrievers-overview.md) for a high level overview of the retrievers abstraction. Refer to [Retrievers examples](docs-content://solutions/search/retrievers-examples.md) for additional examples. :::: @@ -99,156 +99,6 @@ When using the `linear` retriever, fields can be boosted using the `^` notation: GET books/_search { "retriever": { - "knn": { <1> - "field": "vector", <2> - "query_vector": [10, 22, 77], <3> - "k": 10, <4> - "num_candidates": 10 <5> - } - } -} -``` - -1. Configuration for k-nearest neighbor (knn) search, which is based on vector similarity. -2. Specifies the field name that contains the vectors. -3. The query vector against which document vectors are compared in the `knn` search. -4. The number of nearest neighbors to return as top hits. This value must be fewer than or equal to `num_candidates`. -5. The size of the initial candidate set from which the final `k` nearest neighbors are selected. - - - - -## Linear Retriever [linear-retriever] - -A retriever that normalizes and linearly combines the scores of other retrievers. - - -#### Parameters [linear-retriever-parameters] - -`retrievers` -: (Required, array of objects) - - A list of the sub-retrievers' configuration, that we will take into account and whose result sets we will merge through a weighted sum. Each configuration can have a different weight and normalization depending on the specified retriever. - -`normalizer` -: (Optional, String) - - Specifies a normalizer to be applied to all sub-retrievers. This provides a simple way to configure normalization for all retrievers at once. - - The `normalizer` can be specified at the top level, at the per-retriever level, or both, with the following rules: - - * If only the top-level `normalizer` is specified, it applies to all sub-retrievers. - * If both a top-level and a per-retriever `normalizer` are specified, the per-retriever normalizer must be identical to the top-level one. If they differ, the request will fail. - * If only per-retriever normalizers are specified, they can be different for each sub-retriever. - * If no normalizer is specified at any level, no normalization is applied. - - Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`. - -Each entry in the `retrievers` array specifies the following parameters: - -`retriever` -: (Required, a `retriever` object) - - Specifies the retriever for which we will compute the top documents for. The retriever will produce `rank_window_size` results, which will later be merged based on the specified `weight` and `normalizer`. - -`weight` -: (Optional, float) - - The weight that each score of this retriever’s top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0. - -`normalizer` -: (Optional, String) - - Specifies how we will normalize this specific retriever’s scores, before applying the specified `weight`. If a top-level `normalizer` is also specified, this normalizer must be the same. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`. - - * `none` - * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula - - ``` - score = (score - min) / (max - min) - ``` - - * `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values. - -See also [this hybrid search example](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. - -`rank_window_size` -: (Optional, integer) - - This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request’s [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter. - - -`filter` -: (Optional, [query object or list of query objects](/reference/query-languages/querydsl.md)) - - Applies the specified [boolean query filter](/reference/query-languages/query-dsl/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications. - - - -## RRF Retriever [rrf-retriever] - -An [RRF](/reference/elasticsearch/rest-apis/reciprocal-rank-fusion.md) retriever returns top documents based on the RRF formula, equally weighting two or more child retrievers. Reciprocal rank fusion (RRF) is a method for combining multiple result sets with different relevance indicators into a single result set. - - -#### Parameters [rrf-retriever-parameters] - -`retrievers` -: (Required, array of retriever objects) - - A list of child retrievers to specify which sets of returned top documents will have the RRF formula applied to them. Each child retriever carries an equal weight as part of the RRF formula. Two or more child retrievers are required. - - -`rank_constant` -: (Optional, integer) - - This value determines how much influence documents in individual result sets per query have over the final ranked result set. A higher value indicates that lower ranked documents have more influence. This value must be greater than or equal to `1`. Defaults to `60`. - - -`rank_window_size` -: (Optional, integer) - - This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request’s [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter. - - -`filter` -: (Optional, [query object or list of query objects](/reference/query-languages/querydsl.md)) - - Applies the specified [boolean query filter](/reference/query-languages/query-dsl/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications. - - - -### Example: Hybrid search [rrf-retriever-example-hybrid] - -A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF: - -```console -GET /restaurants/_search -{ - "retriever": { - "rrf": { <1> - "retrievers": [ <2> - { - "standard": { <3> - "query": { - "multi_match": { - "query": "Austria", - "fields": [ - "city", - "region" - ] - } - } - } - }, - { - "knn": { <4> - "field": "vector", - "query_vector": [10, 22, 77], - "k": 10, - "num_candidates": 10 - } - } -======= "linear": { "query": "elasticsearch", "fields": [ @@ -388,5 +238,5 @@ Note, however, that wildcard field patterns will only resolve to fields that eit ### Examples -- [RRF with the multi-field query format](retrievers/retrievers-examples.md#retrievers-examples-rrf-multi-field-query-format) -- [Linear retriever with the multi-field query format](retrievers/retrievers-examples.md#retrievers-examples-linear-multi-field-query-format) \ No newline at end of file +- [RRF with the multi-field query format](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-rrf-multi-field-query-format) +- [Linear retriever with the multi-field query format](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-multi-field-query-format) \ No newline at end of file From 2a571ecd16813b7ec8d4d27fd11c382d5a8e545a Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 31 Jul 2025 19:46:52 +0100 Subject: [PATCH 21/51] Reverted it to main --- docs/reference/elasticsearch/rest-apis/retrievers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md index fa3810a435a01..6ed4ff3676d7d 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers.md @@ -11,7 +11,7 @@ applies_to: A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the [search API](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search) that also return top documents such as [`query`](/reference/query-languages/querydsl.md) and [`knn`](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-api-knn). A retriever may have child retrievers where a retriever with two or more children is considered a compound retriever. This allows for complex behavior to be depicted in a tree-like structure, called the retriever tree, which clarifies the order of operations that occur during a search. ::::{tip} -Refer to [*Retrievers*](docs-content://solutions/search/retrievers-overview.md) for a high level overview of the retrievers abstraction. Refer to [Retrievers examples](docs-content://solutions/search/retrievers-examples.md) for additional examples. +Refer to [*Retrievers*](docs-content://solutions/search/retrievers-overview.md) for a high level overview of the retrievers abstraction. Refer to [Retrievers examples](retrievers/retrievers-examples.md) for additional examples. :::: From 7808fc1ffe33cf2044044f5990dde0aceb42c3ad Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 31 Jul 2025 19:48:13 +0100 Subject: [PATCH 22/51] reverted --- docs/reference/elasticsearch/rest-apis/retrievers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md index 6ed4ff3676d7d..a076d34941c09 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers.md @@ -238,5 +238,5 @@ Note, however, that wildcard field patterns will only resolve to fields that eit ### Examples -- [RRF with the multi-field query format](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-rrf-multi-field-query-format) -- [Linear retriever with the multi-field query format](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-multi-field-query-format) \ No newline at end of file +- [RRF with the multi-field query format](retrievers/retrievers-examples.md#retrievers-examples-rrf-multi-field-query-format) +- [Linear retriever with the multi-field query format](retrievers/retrievers-examples.md#retrievers-examples-linear-multi-field-query-format) \ No newline at end of file From a44cf1d1e9322f86b7f97bf037d0523ad766b7c9 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 31 Jul 2025 19:50:43 +0100 Subject: [PATCH 23/51] cleaned up --- .../elasticsearch/rest-apis/retrievers/linear-retriever.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index accd772aa0305..b4f6e15433b37 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -31,12 +31,9 @@ Combining `query` and `retrievers` is not supported. `normalizer` {applies_to}`stack: ga 9.1` : (Optional, String) - The normalizer to use for score normalization. This serves as the default normalizer for all sub-retrievers. + The normalizer to use when using the [multi-field query format](../retrievers.md#multi-field-query-format). See [normalizers](#linear-retriever-normalizers) for supported values. - - When using the [multi-field query format](../retrievers.md#multi-field-query-format), this field is required when `query` is specified. - - When using the `retrievers` array format, this field serves as the default normalizer for all sub-retrievers. Individual sub-retrievers can override this default by specifying their own `normalizer` field. + Required when `query` is specified. ::::{warning} Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches. From 89edd4f73127bbbef308966541995417715684c3 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 31 Jul 2025 20:03:07 +0100 Subject: [PATCH 24/51] cleaned it up --- .../rank/linear/LinearRetrieverBuilder.java | 60 +++---------------- 1 file changed, 8 insertions(+), 52 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 7263ab3f7a37f..791dd29393cac 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -129,7 +129,7 @@ private void normalizeNormalizerArray(ScoreNormalizer topLevelNormalizer, ScoreN if (topLevelNormalizer != null) { // Validate explicit per-retriever normalizers match top-level - if (current != null && !current.equals(DEFAULT_NORMALIZER) && !current.equals(topLevelNormalizer)) { + if (current != null && current.equals(DEFAULT_NORMALIZER) == false && current.equals(topLevelNormalizer) == false) { throw new IllegalArgumentException( String.format( "[%s] All per-retriever normalizers must match the top-level normalizer: " @@ -189,39 +189,23 @@ public LinearRetrieverBuilder( // Use a mutable list for innerRetrievers so that we can use addChild super(innerRetrievers == null ? new ArrayList<>() : new ArrayList<>(innerRetrievers), rankWindowSize); if (weights.length != this.innerRetrievers.size()) { - throw new IllegalArgumentException( - "[" - + NAME - + "] the number of weights must be equal to the number of retrievers, but found [" - + weights.length - + "] weights and [" - + this.innerRetrievers.size() - + "] retrievers" - ); + throw new IllegalArgumentException("The number of weights must match the number of inner retrievers"); } if (normalizers.length != this.innerRetrievers.size()) { - throw new IllegalArgumentException( - "[" - + NAME - + "] the number of normalizers must be equal to the number of retrievers, but found [" - + normalizers.length - + "] normalizers and [" - + this.innerRetrievers.size() - + "] retrievers" - ); + throw new IllegalArgumentException("The number of normalizers must match the number of inner retrievers"); } + + this.fields = fields == null ? null : List.copyOf(fields); + this.query = query; + this.normalizer = normalizer; this.weights = weights; this.normalizers = normalizers; - // Apply top-level normalizer priority system: - // 1. Retriever-specific override (if specified) - // 2. Top-level normalizer (if specified) - // 3. Default (IdentityScoreNormalizer.INSTANCE) + ScoreNormalizer effectiveNormalizer = normalizer != null ? normalizer : DEFAULT_NORMALIZER; for (int i = 0; i < normalizers.length; i++) { if (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) { normalizers[i] = effectiveNormalizer; } - // If per-retriever normalizer is explicitly specified, keep it (allow override) } } @@ -411,34 +395,6 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { rewritten = new StandardRetrieverBuilder(new MatchNoneQueryBuilder()); } } - if (rewritten instanceof LinearRetrieverBuilder == false) { - return rewritten; - } - LinearRetrieverBuilder linearRewritten = (LinearRetrieverBuilder) rewritten; - - if (normalizer != null) { - ScoreNormalizer[] newNormalizers = new ScoreNormalizer[linearRewritten.normalizers.length]; - Arrays.fill(newNormalizers, normalizer); - rewritten = new LinearRetrieverBuilder( - linearRewritten.innerRetrievers, - linearRewritten.fields, - linearRewritten.query, - normalizer, - linearRewritten.rankWindowSize, - linearRewritten.weights, - newNormalizers - ); - } else { - rewritten = new LinearRetrieverBuilder( - linearRewritten.innerRetrievers, - linearRewritten.fields, - linearRewritten.query, - linearRewritten.normalizer, - linearRewritten.rankWindowSize, - linearRewritten.weights, - linearRewritten.normalizers - ); - } return rewritten; } From 08f68abe11cf42d2e4c554ce5b0c188c79e06517 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 1 Aug 2025 02:53:21 +0100 Subject: [PATCH 25/51] Modified and cleaned code --- .../rank/linear/LinearRetrieverBuilder.java | 43 +++---------------- .../rank/linear/LinearRetrieverComponent.java | 6 +-- .../xpack/rank/linear/ScoreNormalizer.java | 19 ++++++++ .../LinearRetrieverBuilderParsingTests.java | 32 ++++---------- 4 files changed, 34 insertions(+), 66 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 791dd29393cac..f7dd499e66ef4 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -123,37 +123,6 @@ private static ScoreNormalizer[] getDefaultNormalizers(List inn return normalizers; } - private void normalizeNormalizerArray(ScoreNormalizer topLevelNormalizer, ScoreNormalizer[] normalizers) { - for (int i = 0; i < normalizers.length; i++) { - ScoreNormalizer current = normalizers[i]; - - if (topLevelNormalizer != null) { - // Validate explicit per-retriever normalizers match top-level - if (current != null && current.equals(DEFAULT_NORMALIZER) == false && current.equals(topLevelNormalizer) == false) { - throw new IllegalArgumentException( - String.format( - "[%s] All per-retriever normalizers must match the top-level normalizer: " - + "expected [%s], found [%s] in retriever [%d]", - NAME, - topLevelNormalizer.getName(), - current.getName(), - i - ) - ); - } - // Propagate top-level normalizer to unspecified positions - if (current == null || current.equals(DEFAULT_NORMALIZER)) { - normalizers[i] = topLevelNormalizer; - } - } else { - // No top-level normalizer: ensure null values become DEFAULT_NORMALIZER - if (current == null) { - normalizers[i] = DEFAULT_NORMALIZER; - } - } - } - } - public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { if (context.clusterSupportsFeature(LINEAR_RETRIEVER_SUPPORTED) == false) { throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]"); @@ -199,12 +168,12 @@ public LinearRetrieverBuilder( this.query = query; this.normalizer = normalizer; this.weights = weights; - this.normalizers = normalizers; - - ScoreNormalizer effectiveNormalizer = normalizer != null ? normalizer : DEFAULT_NORMALIZER; + this.normalizers = new ScoreNormalizer[normalizers.length]; for (int i = 0; i < normalizers.length; i++) { if (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) { - normalizers[i] = effectiveNormalizer; + this.normalizers[i] = normalizer != null ? normalizer : DEFAULT_NORMALIZER; + } else { + this.normalizers[i] = normalizers[i]; } } } @@ -420,9 +389,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.startObject(); builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); - if (normalizers[index] != null && normalizers[index].equals(DEFAULT_NORMALIZER) == false) { - builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); - } + builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); builder.endObject(); index++; } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index 7da03860d9a26..bb0d79d3fe488 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -38,7 +38,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, assert retrieverBuilder != null; this.retriever = retrieverBuilder; this.weight = weight == null ? DEFAULT_WEIGHT : weight; - this.normalizer = normalizer; + this.normalizer = normalizer == null ? DEFAULT_NORMALIZER : normalizer; if (this.weight < 0) { throw new IllegalArgumentException("[weight] must be non-negative"); } @@ -48,9 +48,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(RETRIEVER_FIELD.getPreferredName(), retriever); builder.field(WEIGHT_FIELD.getPreferredName(), weight); - if (normalizer != null && normalizer.equals(DEFAULT_NORMALIZER) == false) { - builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); - } + builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); return builder; } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java index 34b7277498218..d760ffa878aaa 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java @@ -9,6 +9,8 @@ import org.apache.lucene.search.ScoreDoc; +import java.util.Objects; + /** * A no-op {@link ScoreNormalizer} that does not modify the scores. */ @@ -31,4 +33,21 @@ public static ScoreNormalizer valueOf(String normalizer) { public abstract String getName(); public abstract ScoreDoc[] normalizeScores(ScoreDoc[] docs); + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ScoreNormalizer that = (ScoreNormalizer) obj; + return Objects.equals(getName(), that.getName()); + } + + @Override + public int hashCode() { + return Objects.hash(getName()); + } } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index dc57a6a175167..525234fc8e7c8 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -62,30 +62,14 @@ protected LinearRetrieverBuilder createTestInstance() { List innerRetrievers = new ArrayList<>(); float[] weights = new float[num]; ScoreNormalizer[] normalizers = new ScoreNormalizer[num]; - // Create normalizer combinations that follow the API design rules - if (normalizer != null) { - // When top-level normalizer is specified, per-retriever normalizers must either: - // 1. Be null/default (will be propagated), or - // 2. Exactly match the top-level normalizer - boolean useMatchingNormalizers = randomBoolean(); - for (int i = 0; i < num; i++) { - innerRetrievers.add( - new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) - ); - weights[i] = randomFloat(); - if (useMatchingNormalizers) { - normalizers[i] = normalizer; // Exactly match top-level - } else { - normalizers[i] = randomBoolean() ? null : IdentityScoreNormalizer.INSTANCE; // Will be propagated - } - } - } else { - // No top-level normalizer: per-retriever normalizers can be anything - for (int i = 0; i < num; i++) { - innerRetrievers.add( - new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) - ); - weights[i] = randomFloat(); + for (int i = 0; i < num; i++) { + innerRetrievers.add( + new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) + ); + weights[i] = randomFloat(); + if (normalizer != null && randomBoolean()) { + normalizers[i] = normalizer; + } else { normalizers[i] = randomScoreNormalizer(); } } From c13f6147cbec085f73b1793f40140b22fe3e0d9f Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 7 Aug 2025 11:20:57 +0100 Subject: [PATCH 26/51] Compilation and styling clean --- .../rest-apis/retrievers/linear-retriever.md | 3 +- .../rank/linear/LinearRetrieverBuilder.java | 37 +++++++- .../LinearRetrieverBuilderParsingTests.java | 41 +++++++-- .../linear/LinearRetrieverBuilderTests.java | 84 +++++++++++++++++++ .../10_linear_retriever_normalizers.yml | 38 +++++++-- 5 files changed, 182 insertions(+), 21 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index b4f6e15433b37..6602630c74df0 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -35,6 +35,8 @@ Combining `query` and `retrievers` is not supported. See [normalizers](#linear-retriever-normalizers) for supported values. Required when `query` is specified. + When used with `retrievers`, the top-level `normalizer` serves as a default for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer when both are specified. + ::::{warning} Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches. See [field grouping](../retrievers.md#multi-field-field-grouping) for more information. @@ -94,7 +96,6 @@ The `linear` retriever supports the following normalizers: ## Examples [linear-retriever-examples] -### Top-level normalizer example This example shows how to use a top-level normalizer that applies to all sub-retrievers: diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index f7dd499e66ef4..bbc30a065999c 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -168,11 +168,23 @@ public LinearRetrieverBuilder( this.query = query; this.normalizer = normalizer; this.weights = weights; + // Apply normalizer precedence logic: + // 1. If per-retriever normalizer is explicitly set (not identity when top-level exists), use it + // 2. Otherwise, use top-level normalizer if available + // 3. Otherwise, use default (identity) + // Note: LinearRetrieverComponent defaults null to identity, so we treat identity + // as "not explicitly set" when there's a top-level normalizer this.normalizers = new ScoreNormalizer[normalizers.length]; for (int i = 0; i < normalizers.length; i++) { - if (normalizers[i] == null || normalizers[i].equals(DEFAULT_NORMALIZER)) { + if (normalizers[i] == null) { + // This shouldn't happen with current LinearRetrieverComponent, but handle it anyway this.normalizers[i] = normalizer != null ? normalizer : DEFAULT_NORMALIZER; + } else if (normalizers[i].equals(DEFAULT_NORMALIZER) && normalizer != null) { + // Identity normalizer with top-level set - use top-level + // (identity was likely defaulted by LinearRetrieverComponent) + this.normalizers[i] = normalizer; } else { + // Explicitly set normalizer - use as-is this.normalizers[i] = normalizers[i]; } } @@ -381,6 +393,18 @@ ScoreNormalizer[] getNormalizers() { return normalizers; } + List getFields() { + return fields; + } + + String getQuery() { + return query; + } + + ScoreNormalizer getNormalizer() { + return normalizer; + } + public void doToXContent(XContentBuilder builder, Params params) throws IOException { int index = 0; if (innerRetrievers.isEmpty() == false) { @@ -389,7 +413,16 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.startObject(); builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); - builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); + + // Only serialize normalizer if it's different from what would be the default + // If there's a top-level normalizer and this is identity, don't serialize + // (it was likely defaulted and will be replaced by top-level on parse) + // If there's no top-level normalizer and this is identity, don't serialize + // (it's the default) + ScoreNormalizer expectedDefault = normalizer != null ? normalizer : DEFAULT_NORMALIZER; + if (!normalizers[index].equals(expectedDefault)) { + builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); + } builder.endObject(); index++; } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 525234fc8e7c8..3ec357f66cefb 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -19,21 +19,23 @@ import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; import org.junit.AfterClass; import org.junit.BeforeClass; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; -import static java.util.Collections.emptyList; +import static org.hamcrest.Matchers.instanceOf; public class LinearRetrieverBuilderParsingTests extends AbstractXContentTestCase { private static List xContentRegistryEntries; @BeforeClass public static void init() { - xContentRegistryEntries = new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents(); + xContentRegistryEntries = new SearchModule(Settings.EMPTY, Collections.emptyList()).getNamedXContents(); } @AfterClass @@ -67,10 +69,10 @@ protected LinearRetrieverBuilder createTestInstance() { new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) ); weights[i] = randomFloat(); - if (normalizer != null && randomBoolean()) { - normalizers[i] = normalizer; - } else { + if (randomBoolean()) { normalizers[i] = randomScoreNormalizer(); + } else { + normalizers[i] = null; } } @@ -112,10 +114,31 @@ protected NamedXContentRegistry xContentRegistry() { } private static ScoreNormalizer randomScoreNormalizer() { - if (randomBoolean()) { - return MinMaxScoreNormalizer.INSTANCE; - } else { - return IdentityScoreNormalizer.INSTANCE; + int random = randomInt(2); + return switch (random) { + case 0 -> MinMaxScoreNormalizer.INSTANCE; + case 1 -> L2ScoreNormalizer.INSTANCE; + default -> IdentityScoreNormalizer.INSTANCE; + }; + } + + public void testTopLevelNormalizer() throws IOException { + String json = """ + { + "retrievers": [ + { "test_retriever": {} }, + { "test_retriever": {} } + ], + "normalizer": "min_max" + }"""; + + try (XContentParser parser = createParser(XContentType.JSON.xContent(), json)) { + + LinearRetrieverBuilder builder = doParseInstance(parser); + assertThat(builder.getNormalizer(), instanceOf(MinMaxScoreNormalizer.class)); + for (ScoreNormalizer normalizer : builder.getNormalizers()) { + assertThat(normalizer, instanceOf(MinMaxScoreNormalizer.class)); + } } } } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index f87816fbe4427..4da1e6110f2a9 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -386,4 +386,88 @@ public void testTopLevelNormalizerWithPerRetrieverOverrides() { assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); } + public void testExplicitIdentityNormalizerOverridesTopLevel() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( + "title_vector", + new float[] { 0.1f, 0.2f, 0.3f }, + null, + 10, + 100, + null, + null + ); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), + CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) + ), + null, + null, + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f, 2.0f }, + new ScoreNormalizer[] { IdentityScoreNormalizer.INSTANCE, null } + ); + + assertEquals(IdentityScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); + } + + public void testNullNormalizerUsesTopLevelAsDefault() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of(CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever)), + null, + null, + MinMaxScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f }, + new ScoreNormalizer[] { null } + ); + + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + } + + public void testNullNormalizersWithoutTopLevelUsesIdentity() { + StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of(CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever)), + null, + null, + null, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f }, + new ScoreNormalizer[] { null } + ); + + assertEquals(IdentityScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + } + + public void testMixedNormalizerInheritanceScenario() { + StandardRetrieverBuilder standardRetriever1 = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); + StandardRetrieverBuilder standardRetriever2 = new StandardRetrieverBuilder(new MatchQueryBuilder("content", "search")); + StandardRetrieverBuilder standardRetriever3 = new StandardRetrieverBuilder(new MatchQueryBuilder("tags", "java")); + + LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever1), + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever2), + CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever3) + ), + null, + null, + L2ScoreNormalizer.INSTANCE, + DEFAULT_RANK_WINDOW_SIZE, + new float[] { 1.0f, 2.0f, 3.0f }, + new ScoreNormalizer[] { null, MinMaxScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE } + ); + assertEquals(L2ScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); + assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); + assertEquals(IdentityScoreNormalizer.INSTANCE, retriever.getNormalizers()[2]); + } + } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml index d99cda3185027..2678f5a567f75 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml @@ -212,36 +212,56 @@ setup: - close_to: { hits.hits.1._score: { value: 0.385, error: 0.01} } --- -"Linear retriever with mismatched normalizers (should fail)": +"Linear retriever with per-retriever normalizer override": - do: - catch: bad_request search: index: test body: retriever: linear: - normalizer: l2_norm + normalizer: minmax retrievers: [ { retriever: { standard: { query: { - match_all: {} + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 5.0 + } } } - } + }, + weight: 1.0, + normalizer: l2_norm }, { retriever: { standard: { query: { - match_all: {} + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 12.0 + } } } }, - normalizer: minmax + weight: 1.0 } ] - - match: { error.root_cause.0.type: "illegal_argument_exception" } - - match: { error.root_cause.0.reason: /.*All per-retriever normalizers must match the top-level normalizer.*/ } + - match: { hits.total.value: 2 } + # First retriever uses l2_norm override, second uses minmax from top-level + # The scores will reflect this mixed normalization approach From 182afeee85f1a63b68c88216c0a47a0958008c39 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 7 Aug 2025 12:35:58 +0100 Subject: [PATCH 27/51] Parsing issues resolved --- .../rank/linear/LinearRetrieverBuilder.java | 22 +++++++-------- .../LinearRetrieverBuilderParsingTests.java | 28 +++++++++++++++---- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index bbc30a065999c..0321bfed6ad08 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -414,15 +414,10 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); - // Only serialize normalizer if it's different from what would be the default - // If there's a top-level normalizer and this is identity, don't serialize - // (it was likely defaulted and will be replaced by top-level on parse) - // If there's no top-level normalizer and this is identity, don't serialize - // (it's the default) - ScoreNormalizer expectedDefault = normalizer != null ? normalizer : DEFAULT_NORMALIZER; - if (!normalizers[index].equals(expectedDefault)) { - builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); - } + // Always serialize the normalizer name - this ensures consistent behavior during parsing + // The parser will handle applying the top-level normalizer if needed + builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); + builder.endObject(); index++; } @@ -439,7 +434,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept if (query != null) { builder.field(QUERY_FIELD.getPreferredName(), query); } - if (normalizer != null && normalizer.equals(DEFAULT_NORMALIZER) == false) { + if (normalizer != null) { builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); } @@ -448,9 +443,12 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept @Override public boolean doEquals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (super.doEquals(o) == false) return false; + LinearRetrieverBuilder that = (LinearRetrieverBuilder) o; - return super.doEquals(o) - && Arrays.equals(weights, that.weights) + return Arrays.equals(weights, that.weights) && Arrays.equals(normalizers, that.normalizers) && Objects.equals(fields, that.fields) && Objects.equals(query, that.query) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 3ec357f66cefb..23f32dd5a7785 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -125,11 +125,29 @@ private static ScoreNormalizer randomScoreNormalizer() { public void testTopLevelNormalizer() throws IOException { String json = """ { - "retrievers": [ - { "test_retriever": {} }, - { "test_retriever": {} } - ], - "normalizer": "min_max" + "linear": { + "retrievers": [ + { + "retriever": { + "test": { + "value": "test1" + } + }, + "weight": 1.0, + "normalizer": "none" + }, + { + "retriever": { + "test": { + "value": "test2" + } + }, + "weight": 1.0, + "normalizer": "none" + } + ], + "normalizer": "minmax" + } }"""; try (XContentParser parser = createParser(XContentType.JSON.xContent(), json)) { From 2ab7912e583164d700434b8d52123a9c2e27a8bc Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 7 Aug 2025 16:29:12 +0100 Subject: [PATCH 28/51] Unittestspass but parsing issue equality persists --- .../rank/linear/LinearRetrieverBuilder.java | 45 +++++++++---------- .../rank/linear/LinearRetrieverComponent.java | 6 ++- .../LinearRetrieverBuilderParsingTests.java | 3 +- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 0321bfed6ad08..dc7f9a023e295 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -75,6 +75,17 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder top-level normalizer (if specified) > default (none) + if (componentNormalizer != null) { + return componentNormalizer; + } + if (topLevelNormalizer != null) { + return topLevelNormalizer; + } + return DEFAULT_NORMALIZER; + } + @SuppressWarnings("unchecked") static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, @@ -93,7 +104,7 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder Date: Thu, 7 Aug 2025 20:28:41 +0100 Subject: [PATCH 29/51] Parsing and builder tests are passing --- .../elasticsearch/search/retriever/RetrieverBuilder.java | 3 ++- .../xpack/rank/linear/LinearRetrieverBuilder.java | 6 +----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java index 0a62e9f968e4f..5b33dafec6614 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java @@ -281,6 +281,7 @@ public final boolean equals(Object o) { RetrieverBuilder that = (RetrieverBuilder) o; return Objects.equals(preFilterQueryBuilders, that.preFilterQueryBuilders) && Objects.equals(minScore, that.minScore) + && Objects.equals(retrieverName, that.retrieverName) && doEquals(o); } @@ -288,7 +289,7 @@ public final boolean equals(Object o) { @Override public final int hashCode() { - return Objects.hash(getClass(), preFilterQueryBuilders, minScore, doHashCode()); + return Objects.hash(getClass(), preFilterQueryBuilders, minScore, retrieverName, doHashCode()); } protected abstract int doHashCode(); diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index dc7f9a023e295..f6055eba0f9b0 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -411,11 +411,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); - ScoreNormalizer normalizerToUse = normalizers[index]; - if (normalizer != null && normalizerToUse.equals(IdentityScoreNormalizer.INSTANCE)) { - normalizerToUse = normalizer; - } - builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizerToUse.getName()); + builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); builder.endObject(); index++; From e0873624f445bd6b12721c96ccc9918efafc43d9 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 11:23:21 +0100 Subject: [PATCH 30/51] Add comprehensive normalizer testing and cleanup duplicate files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add extensive YAML REST tests covering 12 normalizer scenarios in 10_linear_retriever.yml - Add end-to-end integration test for mixed normalizer inheritance - Remove duplicate 10_linear_retriever_normalizers.yml file as requested - Cover edge cases: zero scores, large differences, error handling, field+query format - Ensure robust testing for production-level quality 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../xpack/rank/linear/LinearRetrieverIT.java | 56 ++ .../test/linear/10_linear_retriever.yml | 564 ++++++++++++++++++ .../10_linear_retriever_normalizers.yml | 267 --------- 3 files changed, 620 insertions(+), 267 deletions(-) delete mode 100644 x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java index f98231a647470..866dc2982c48c 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java @@ -835,4 +835,60 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws ); assertThat(numAsyncCalls.get(), equalTo(4)); } + + public void testMixedNormalizerInheritance() throws IOException { + // Index some test documents with different score characteristics + client().prepareIndex(INDEX) + .setId("1") + .setSource("field1", "elasticsearch search", "field2", "database technology", "score", 10) + .get(); + client().prepareIndex(INDEX) + .setId("2") + .setSource("field1", "lucene engine", "field2", "search technology", "score", 5) + .get(); + client().prepareIndex(INDEX) + .setId("3") + .setSource("field1", "information retrieval", "field2", "database search", "score", 15) + .get(); + refresh(INDEX); + + // Create linear retriever with mixed normalizer configuration: + // - Top-level normalizer: minmax + // - First component: uses minmax (inherited from top-level) + // - Second component: explicitly uses l2_norm (overrides top-level) + // - Third component: uses minmax (inherited from top-level) + LinearRetrieverBuilder linearRetriever = new LinearRetrieverBuilder( + List.of( + CompoundRetrieverBuilder.RetrieverSource.from( + new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "elasticsearch")) + ), + CompoundRetrieverBuilder.RetrieverSource.from( + new StandardRetrieverBuilder(QueryBuilders.matchQuery("field2", "technology")) + ), + CompoundRetrieverBuilder.RetrieverSource.from( + new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "search")) + ) + ), + null, + null, + MinMaxScoreNormalizer.INSTANCE, // top-level normalizer + 10, + new float[]{1.0f, 1.0f, 1.0f}, + new ScoreNormalizer[]{null, L2ScoreNormalizer.INSTANCE, null} // mixed component normalizers + ); + + // Verify that the normalizer inheritance works correctly + assertThat(linearRetriever.getNormalizers()[0], equalTo(MinMaxScoreNormalizer.INSTANCE)); // inherited + assertThat(linearRetriever.getNormalizers()[1], equalTo(L2ScoreNormalizer.INSTANCE)); // overridden + assertThat(linearRetriever.getNormalizers()[2], equalTo(MinMaxScoreNormalizer.INSTANCE)); // inherited + + // Execute the search to ensure it works end-to-end + assertResponse( + client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(linearRetriever)), + searchResponse -> { + // Verify we get some results - exact ranking depends on score normalization + assertThat(searchResponse.getHits().getTotalHits().value() > 0L, is(true)); + } + ); + } } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index b9e55aa85e05b..51e69b7262d7b 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1547,3 +1547,567 @@ setup: - match: { hits.total.value: 0 } - length: { hits.hits: 0 } + +--- +"Linear retriever with top-level L2 normalization": + - requires: + cluster_features: [ "linear_retriever.l2_norm" ] + reason: "Support for L2 normalization" + + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 5.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 12.0 + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "4" } # Doc 4 should rank higher with normalized scores + - match: { hits.hits.1._id: "1" } + # With L2 normalization: [5.0, 12.0] becomes [5.0/13.0, 12.0/13.0] + - close_to: { hits.hits.0._score: { value: 0.923, error: 0.01} } # 12.0/13.0 + - close_to: { hits.hits.1._score: { value: 0.385, error: 0.01} } # 5.0/13.0 + +--- +"Linear retriever with top-level minmax normalization": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 3.0 + } + } + } + }, + weight: 2.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 9.0 + } + } + } + }, + weight: 1.5 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.1._id: "1" } + # With minmax normalization: [3.0, 9.0] becomes [0.0, 1.0] (min=3, max=9) + # Final scores: doc1 = 2.0 * 0.0 = 0.0, doc4 = 1.5 * 1.0 = 1.5 + - close_to: { hits.hits.0._score: { value: 1.5, error: 0.001} } + - close_to: { hits.hits.1._score: { value: 0.0, error: 0.001} } + +--- +"Linear retriever with per-retriever normalizer override": + - requires: + cluster_features: [ "linear_retriever.l2_norm" ] + reason: "Support for L2 normalization" + + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax # Top-level normalizer + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 5.0 + } + } + } + }, + weight: 1.0, + normalizer: l2_norm # Override with L2 norm + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 12.0 + } + } + } + }, + weight: 1.0 # Uses top-level minmax normalizer + } + ] + + - match: { hits.total.value: 2 } + # First retriever uses L2 norm (score normalized to 1.0) + # Second retriever uses minmax (score normalized to 1.0 since it's the only score) + # Both docs get final score of 1.0, ranking by document order + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "4" } + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.001} } + +--- +"Linear retriever with mixed normalizers - inheritance test": + - requires: + cluster_features: [ "linear_retriever.l2_norm" ] + reason: "Support for L2 normalization" + + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm # Top-level normalizer + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 4.0 + } + } + } + }, + weight: 1.0 + # No normalizer specified - should inherit top-level L2 norm + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "two" + } + } + }, + boost: 8.0 + } + } + } + }, + weight: 1.0, + normalizer: minmax # Override with minmax + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "three" + } + } + }, + boost: 6.0 + } + } + } + }, + weight: 2.0 + # No normalizer specified - should inherit top-level L2 norm + } + ] + + - match: { hits.total.value: 3 } + # Complex mixed normalization scenario with different weights + # First retriever: L2 norm on [4.0] = 1.0, final = 1.0 * 1.0 = 1.0 + # Second retriever: minmax on [8.0] = 1.0, final = 1.0 * 1.0 = 1.0 + # Third retriever: L2 norm on [6.0] = 1.0, final = 1.0 * 2.0 = 2.0 + - match: { hits.hits.0._id: "3" } # Highest score due to weight=2.0 + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } + +--- +"Linear retriever with identity normalizer (no normalization)": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: none + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 7.5 + } + } + } + }, + weight: 2.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 3.2 + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } # 7.5 * 2.0 = 15.0 + - match: { hits.hits.1._id: "4" } # 3.2 * 1.0 = 3.2 + - close_to: { hits.hits.0._score: { value: 15.0, error: 0.001} } + - close_to: { hits.hits.1._score: { value: 3.2, error: 0.001} } + +--- +"Linear retriever with all normalizers applied to same data": + - requires: + cluster_features: [ "linear_retriever.l2_norm" ] + reason: "Support for L2 normalization" + + - do: + search: + index: test + body: + retriever: + linear: + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: none + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: l2_norm + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 1.0, + normalizer: minmax + } + ] + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "1" } + # All three retrievers return same doc with different normalizations: + # none: 10.0, l2_norm: 1.0, minmax: 1.0 (single value) + # Final score: 10.0 + 1.0 + 1.0 = 12.0 + - close_to: { hits.hits.0._score: { value: 12.0, error: 0.001} } + +--- +"Linear retriever normalization with zero scores": + - do: + search: + index: test + body: + retriever: + linear: + normalizer: minmax + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + nonexistent_field: "nonexistent_value" + } + }, + boost: 5.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 10.0 + } + } + } + }, + weight: 2.0 + } + ] + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "1" } + # Only second retriever returns results, minmax normalization on [10.0] = 1.0 + # Final score: 2.0 * 1.0 = 2.0 + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } + +--- +"Linear retriever with field+query format and normalizer inheritance": + - requires: + cluster_features: [ "multi_fields_query_format_support" ] + reason: "Support for multi-fields query format" + + - do: + search: + index: test + body: + retriever: + linear: + fields: ["keyword", "other_keyword"] + query: "other" + normalizer: l2_norm + + - match: { hits.total.value: 2 } + # Both doc1 and doc4 have "other" in other_keyword field + # L2 normalization should be applied to generated retrievers + +--- +"Linear retriever error handling - invalid normalizer": + - do: + catch: bad_request + search: + index: test + body: + retriever: + linear: + normalizer: invalid_normalizer + retrievers: [ + { + retriever: { + standard: { + query: { + match_all: {} + } + } + }, + weight: 1.0 + } + ] + +--- +"Linear retriever with large score differences and L2 normalization": + - requires: + cluster_features: [ "linear_retriever.l2_norm" ] + reason: "Support for L2 normalization" + + - do: + search: + index: test + body: + retriever: + linear: + normalizer: l2_norm + retrievers: [ + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "one" + } + } + }, + boost: 1000.0 + } + } + } + }, + weight: 1.0 + }, + { + retriever: { + standard: { + query: { + constant_score: { + filter: { + term: { + keyword: { + value: "four" + } + } + }, + boost: 1.0 + } + } + } + }, + weight: 1.0 + } + ] + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "4" } + # L2 normalization handles large score differences gracefully + # [1000.0, 1.0] normalized = [1000.0/sqrt(1000000+1), 1.0/sqrt(1000000+1)] + - close_to: { hits.hits.0._score: { value: 0.9999995, error: 0.0001} } + - close_to: { hits.hits.1._score: { value: 0.001, error: 0.0001} } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml deleted file mode 100644 index 2678f5a567f75..0000000000000 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever_normalizers.yml +++ /dev/null @@ -1,267 +0,0 @@ -setup: - - requires: - cluster_features: [ "linear_retriever_supported", "linear_retriever.l2_norm" ] - reason: "Support for linear retriever and L2 normalization" - test_runner_features: close_to - - - do: - indices.create: - index: test - body: - mappings: - properties: - vector: - type: dense_vector - dims: 1 - index: true - similarity: l2_norm - index_options: - type: flat - keyword: - type: keyword - other_keyword: - type: keyword - timestamp: - type: date - - - do: - bulk: - refresh: true - index: test - body: - - '{"index": {"_id": 1 }}' - - '{"vector": [1], "keyword": "one", "other_keyword": "other", "timestamp": "2021-01-01T00:00:00"}' - - '{"index": {"_id": 2 }}' - - '{"vector": [2], "keyword": "two", "timestamp": "2022-01-01T00:00:00"}' - - '{"index": {"_id": 3 }}' - - '{"vector": [3], "keyword": "three", "timestamp": "2023-01-01T00:00:00"}' - - '{"index": {"_id": 4 }}' - - '{"vector": [4], "keyword": "four", "other_keyword": "other", "timestamp": "2024-01-01T00:00:00"}' - ---- -"Linear retriever with top-level L2 normalization": - - do: - search: - index: test - body: - retriever: - linear: - normalizer: l2_norm - retrievers: [ - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 5.0 - } - } - } - }, - weight: 1.0 - }, - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "four" - } - } - }, - boost: 12.0 - } - } - } - }, - weight: 1.0 - } - ] - - - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "4" } # Doc 4 should rank higher with normalized scores - - match: { hits.hits.1._id: "1" } - # With L2 normalization: [5.0, 12.0] becomes [5.0/13.0, 12.0/13.0] - - close_to: { hits.hits.0._score: { value: 0.923, error: 0.01} } # 12.0/13.0 - - close_to: { hits.hits.1._score: { value: 0.385, error: 0.01} } # 5.0/13.0 - ---- -"Linear retriever with per-retriever L2 normalization": - - do: - search: - index: test - body: - retriever: - linear: - retrievers: [ - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 5.0 - } - } - } - }, - weight: 1.0, - normalizer: l2_norm - }, - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "four" - } - } - }, - boost: 12.0 - } - } - } - }, - weight: 1.0, - normalizer: l2_norm - } - ] - - - match: { hits.total.value: 2 } - # With per-retriever L2 normalization, both scores would be normalized to 1.0 - # So final score = 1.0 * weight1 + 1.0 * weight2 = 2.0 for each doc - # Then sorting is done by _doc (or some other tiebreaker) - - close_to: { hits.hits.0._score: { value: 1.0, error: 0.01} } - - close_to: { hits.hits.1._score: { value: 1.0, error: 0.01} } - ---- -"Linear retriever with mixed normalization (top-level and per-retriever with same normalizer)": - - do: - search: - index: test - body: - retriever: - linear: - normalizer: l2_norm - retrievers: [ - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 5.0 - } - } - } - }, - weight: 1.0 - }, - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "four" - } - } - }, - boost: 12.0 - } - } - } - }, - weight: 1.0, - normalizer: l2_norm - } - ] - - - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "4" } - - match: { hits.hits.1._id: "1" } - # With L2 normalization: [5.0, 12.0] becomes [5.0/13.0, 12.0/13.0] - - close_to: { hits.hits.0._score: { value: 0.923, error: 0.01} } - - close_to: { hits.hits.1._score: { value: 0.385, error: 0.01} } - ---- -"Linear retriever with per-retriever normalizer override": - - do: - search: - index: test - body: - retriever: - linear: - normalizer: minmax - retrievers: [ - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 5.0 - } - } - } - }, - weight: 1.0, - normalizer: l2_norm - }, - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "four" - } - } - }, - boost: 12.0 - } - } - } - }, - weight: 1.0 - } - ] - - - match: { hits.total.value: 2 } - # First retriever uses l2_norm override, second uses minmax from top-level - # The scores will reflect this mixed normalization approach From a964d0889460855403fa3b2563c084382d13fd72 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 11:29:48 +0100 Subject: [PATCH 31/51] Apply spotless code formatting --- .../xpack/rank/linear/LinearRetrieverIT.java | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java index 866dc2982c48c..4e1b9e324de0c 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java @@ -837,15 +837,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } public void testMixedNormalizerInheritance() throws IOException { - // Index some test documents with different score characteristics + // Index some test documents with different score characteristics client().prepareIndex(INDEX) .setId("1") .setSource("field1", "elasticsearch search", "field2", "database technology", "score", 10) .get(); - client().prepareIndex(INDEX) - .setId("2") - .setSource("field1", "lucene engine", "field2", "search technology", "score", 5) - .get(); + client().prepareIndex(INDEX).setId("2").setSource("field1", "lucene engine", "field2", "search technology", "score", 5).get(); client().prepareIndex(INDEX) .setId("3") .setSource("field1", "information retrieval", "field2", "database search", "score", 15) @@ -853,7 +850,7 @@ public void testMixedNormalizerInheritance() throws IOException { refresh(INDEX); // Create linear retriever with mixed normalizer configuration: - // - Top-level normalizer: minmax + // - Top-level normalizer: minmax // - First component: uses minmax (inherited from top-level) // - Second component: explicitly uses l2_norm (overrides top-level) // - Third component: uses minmax (inherited from top-level) @@ -865,30 +862,25 @@ public void testMixedNormalizerInheritance() throws IOException { CompoundRetrieverBuilder.RetrieverSource.from( new StandardRetrieverBuilder(QueryBuilders.matchQuery("field2", "technology")) ), - CompoundRetrieverBuilder.RetrieverSource.from( - new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "search")) - ) + CompoundRetrieverBuilder.RetrieverSource.from(new StandardRetrieverBuilder(QueryBuilders.matchQuery("field1", "search"))) ), null, null, MinMaxScoreNormalizer.INSTANCE, // top-level normalizer 10, - new float[]{1.0f, 1.0f, 1.0f}, - new ScoreNormalizer[]{null, L2ScoreNormalizer.INSTANCE, null} // mixed component normalizers + new float[] { 1.0f, 1.0f, 1.0f }, + new ScoreNormalizer[] { null, L2ScoreNormalizer.INSTANCE, null } // mixed component normalizers ); // Verify that the normalizer inheritance works correctly assertThat(linearRetriever.getNormalizers()[0], equalTo(MinMaxScoreNormalizer.INSTANCE)); // inherited - assertThat(linearRetriever.getNormalizers()[1], equalTo(L2ScoreNormalizer.INSTANCE)); // overridden + assertThat(linearRetriever.getNormalizers()[1], equalTo(L2ScoreNormalizer.INSTANCE)); // overridden assertThat(linearRetriever.getNormalizers()[2], equalTo(MinMaxScoreNormalizer.INSTANCE)); // inherited // Execute the search to ensure it works end-to-end - assertResponse( - client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(linearRetriever)), - searchResponse -> { - // Verify we get some results - exact ranking depends on score normalization - assertThat(searchResponse.getHits().getTotalHits().value() > 0L, is(true)); - } - ); + assertResponse(client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(linearRetriever)), searchResponse -> { + // Verify we get some results - exact ranking depends on score normalization + assertThat(searchResponse.getHits().getTotalHits().value() > 0L, is(true)); + }); } } From 47d2086471fbfe57030330eeeb5c31fc25281da4 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 12:19:06 +0100 Subject: [PATCH 32/51] Final changes --- .../test/linear/10_linear_retriever.yml | 36 ++++++++++--------- .../linear/20_linear_retriever_simplified.yml | 19 ---------- 2 files changed, 20 insertions(+), 35 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index 51e69b7262d7b..ebd9b5085e0af 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1603,11 +1603,13 @@ setup: ] - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "4" } # Doc 4 should rank higher with normalized scores - - match: { hits.hits.1._id: "1" } - # With L2 normalization: [5.0, 12.0] becomes [5.0/13.0, 12.0/13.0] - - close_to: { hits.hits.0._score: { value: 0.923, error: 0.01} } # 12.0/13.0 - - close_to: { hits.hits.1._score: { value: 0.385, error: 0.01} } # 5.0/13.0 + - match: { hits.hits.0._id: "1" } # Both have same final score, ranked by document order + - match: { hits.hits.1._id: "4" } + # With per-retriever L2 normalization: each retriever normalizes its own scores + # First retriever: [5.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 + # Second retriever: [12.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.01} } + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.01} } --- "Linear retriever with top-level minmax normalization": @@ -1660,12 +1662,13 @@ setup: ] - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "4" } - - match: { hits.hits.1._id: "1" } - # With minmax normalization: [3.0, 9.0] becomes [0.0, 1.0] (min=3, max=9) - # Final scores: doc1 = 2.0 * 0.0 = 0.0, doc4 = 1.5 * 1.0 = 1.5 - - close_to: { hits.hits.0._score: { value: 1.5, error: 0.001} } - - close_to: { hits.hits.1._score: { value: 0.0, error: 0.001} } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "4" } + # With per-retriever minmax normalization: each retriever normalizes its own scores + # First retriever: [3.0] → normalized to [1.0] → final: 1.0 * 2.0 = 2.0 + # Second retriever: [9.0] → normalized to [1.0] → final: 1.0 * 1.5 = 1.5 + - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } + - close_to: { hits.hits.1._score: { value: 1.5, error: 0.001} } --- "Linear retriever with per-retriever normalizer override": @@ -2010,7 +2013,7 @@ setup: --- "Linear retriever with field+query format and normalizer inheritance": - requires: - cluster_features: [ "multi_fields_query_format_support" ] + cluster_features: [ "linear_retriever.multi_fields_query_format_support" ] reason: "Support for multi-fields query format" - do: @@ -2107,7 +2110,8 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "1" } - match: { hits.hits.1._id: "4" } - # L2 normalization handles large score differences gracefully - # [1000.0, 1.0] normalized = [1000.0/sqrt(1000000+1), 1.0/sqrt(1000000+1)] - - close_to: { hits.hits.0._score: { value: 0.9999995, error: 0.0001} } - - close_to: { hits.hits.1._score: { value: 0.001, error: 0.0001} } + # With per-retriever L2 normalization: each retriever normalizes its own scores + # First retriever: [1000.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 + # Second retriever: [1.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 + - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.001} } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml index 7ab0d727a7383..a90354a8d8976 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/20_linear_retriever_simplified.yml @@ -410,25 +410,6 @@ setup: - contains: { error.root_cause.0.reason: "[linear] cannot combine [retrievers] and [query]" } ---- -"Cannot set top-level normalizer when using custom sub-retrievers": - - do: - catch: bad_request - search: - index: test-index - body: - retriever: - linear: - normalizer: "minmax" - retrievers: - - retriever: - standard: - query: - match: - keyword: "bar" - - - contains: { error.root_cause.0.reason: "[linear] [normalizer] cannot be provided when [retrievers] is specified" } - --- "Missing required params": - do: From 22e51aa477886af7c9fdcbc18786602c62f99de1 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 13:32:52 +0100 Subject: [PATCH 33/51] Removed unnecessary changes from doc --- .../rest-apis/retrievers/linear-retriever.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index 6b8325374fe7c..b241f245c0580 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -107,25 +107,18 @@ GET my_index/_search { "retriever": { "knn": { - "field": "title_vector", - "query_vector": [0.1, 0.2, 0.3], - "k": 10, - "num_candidates": 100 + ... } }, - "weight": 5.0 + "weight": 5 # KNN query weighted 5x }, { "retriever": { "standard": { - "query": { - "match": { - "title": "elasticsearch" - } - } + ... } }, - "weight": 1.5 + "weight": 1.5 # BM25 query weighted 1.5x } ] } From b03d586fb391ecd72dba53fb0673a05d41fa8a19 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 13:39:29 +0100 Subject: [PATCH 34/51] Cleaned up test --- .../xpack/rank/linear/LinearRetrieverIT.java | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java index 4e1b9e324de0c..5ef6c689d8778 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java @@ -837,7 +837,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } public void testMixedNormalizerInheritance() throws IOException { - // Index some test documents with different score characteristics client().prepareIndex(INDEX) .setId("1") .setSource("field1", "elasticsearch search", "field2", "database technology", "score", 10) @@ -849,11 +848,6 @@ public void testMixedNormalizerInheritance() throws IOException { .get(); refresh(INDEX); - // Create linear retriever with mixed normalizer configuration: - // - Top-level normalizer: minmax - // - First component: uses minmax (inherited from top-level) - // - Second component: explicitly uses l2_norm (overrides top-level) - // - Third component: uses minmax (inherited from top-level) LinearRetrieverBuilder linearRetriever = new LinearRetrieverBuilder( List.of( CompoundRetrieverBuilder.RetrieverSource.from( @@ -866,20 +860,17 @@ public void testMixedNormalizerInheritance() throws IOException { ), null, null, - MinMaxScoreNormalizer.INSTANCE, // top-level normalizer + MinMaxScoreNormalizer.INSTANCE, 10, new float[] { 1.0f, 1.0f, 1.0f }, - new ScoreNormalizer[] { null, L2ScoreNormalizer.INSTANCE, null } // mixed component normalizers + new ScoreNormalizer[] { null, L2ScoreNormalizer.INSTANCE, null } ); - // Verify that the normalizer inheritance works correctly - assertThat(linearRetriever.getNormalizers()[0], equalTo(MinMaxScoreNormalizer.INSTANCE)); // inherited - assertThat(linearRetriever.getNormalizers()[1], equalTo(L2ScoreNormalizer.INSTANCE)); // overridden - assertThat(linearRetriever.getNormalizers()[2], equalTo(MinMaxScoreNormalizer.INSTANCE)); // inherited + assertThat(linearRetriever.getNormalizers()[0], equalTo(MinMaxScoreNormalizer.INSTANCE)); + assertThat(linearRetriever.getNormalizers()[1], equalTo(L2ScoreNormalizer.INSTANCE)); + assertThat(linearRetriever.getNormalizers()[2], equalTo(MinMaxScoreNormalizer.INSTANCE)); - // Execute the search to ensure it works end-to-end assertResponse(client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(linearRetriever)), searchResponse -> { - // Verify we get some results - exact ranking depends on score normalization assertThat(searchResponse.getHits().getTotalHits().value() > 0L, is(true)); }); } From a6709363099279c8767edec3416fc58b1e179574 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 13:57:53 +0100 Subject: [PATCH 35/51] Cleaned up --- .../xpack/rank/linear/LinearRetrieverBuilder.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index f6055eba0f9b0..2dfcfb923f3f4 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -76,7 +76,6 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder top-level normalizer (if specified) > default (none) if (componentNormalizer != null) { return componentNormalizer; } @@ -410,9 +409,7 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept builder.startObject(); builder.field(LinearRetrieverComponent.RETRIEVER_FIELD.getPreferredName(), entry.retriever()); builder.field(LinearRetrieverComponent.WEIGHT_FIELD.getPreferredName(), weights[index]); - builder.field(LinearRetrieverComponent.NORMALIZER_FIELD.getPreferredName(), normalizers[index].getName()); - builder.endObject(); index++; } From f6a8d4c9f20926643f3d8ec8762e13e6b0b56397 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 15:42:23 +0100 Subject: [PATCH 36/51] Reviewed the code --- .../rank/linear/LinearRetrieverBuilder.java | 19 ++----------------- .../rank/linear/LinearRetrieverComponent.java | 4 +--- .../xpack/rank/linear/ScoreNormalizer.java | 19 ------------------- .../LinearRetrieverBuilderParsingTests.java | 11 +++++++---- 4 files changed, 10 insertions(+), 43 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 2dfcfb923f3f4..20feac674bc4c 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -389,18 +389,6 @@ ScoreNormalizer[] getNormalizers() { return normalizers; } - List getFields() { - return fields; - } - - String getQuery() { - return query; - } - - ScoreNormalizer getNormalizer() { - return normalizer; - } - public void doToXContent(XContentBuilder builder, Params params) throws IOException { int index = 0; if (innerRetrievers.isEmpty() == false) { @@ -435,12 +423,9 @@ public void doToXContent(XContentBuilder builder, Params params) throws IOExcept @Override public boolean doEquals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - if (super.doEquals(o) == false) return false; - LinearRetrieverBuilder that = (LinearRetrieverBuilder) o; - return Arrays.equals(weights, that.weights) + return super.doEquals(o) + && Arrays.equals(weights, that.weights) && Arrays.equals(normalizers, that.normalizers) && Objects.equals(fields, that.fields) && Objects.equals(query, that.query) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java index 583e0e78329a5..963ba6883e7c9 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverComponent.java @@ -48,9 +48,7 @@ public LinearRetrieverComponent(RetrieverBuilder retrieverBuilder, Float weight, public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(RETRIEVER_FIELD.getPreferredName(), retriever); builder.field(WEIGHT_FIELD.getPreferredName(), weight); - if (normalizer != null) { - builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); - } + builder.field(NORMALIZER_FIELD.getPreferredName(), normalizer.getName()); return builder; } diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java index d760ffa878aaa..34b7277498218 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java @@ -9,8 +9,6 @@ import org.apache.lucene.search.ScoreDoc; -import java.util.Objects; - /** * A no-op {@link ScoreNormalizer} that does not modify the scores. */ @@ -33,21 +31,4 @@ public static ScoreNormalizer valueOf(String normalizer) { public abstract String getName(); public abstract ScoreDoc[] normalizeScores(ScoreDoc[] docs); - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null || getClass() != obj.getClass()) { - return false; - } - ScoreNormalizer that = (ScoreNormalizer) obj; - return Objects.equals(getName(), that.getName()); - } - - @Override - public int hashCode() { - return Objects.hash(getName()); - } } diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index f3326a6fb5d9e..cc2dc80b8a4d6 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -25,9 +25,9 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import static java.util.Collections.emptyList; import static org.hamcrest.Matchers.instanceOf; public class LinearRetrieverBuilderParsingTests extends AbstractXContentTestCase { @@ -35,7 +35,7 @@ public class LinearRetrieverBuilderParsingTests extends AbstractXContentTestCase @BeforeClass public static void init() { - xContentRegistryEntries = new SearchModule(Settings.EMPTY, Collections.emptyList()).getNamedXContents(); + xContentRegistryEntries = new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents(); } @AfterClass @@ -152,8 +152,11 @@ public void testTopLevelNormalizer() throws IOException { try (XContentParser parser = createParser(XContentType.JSON.xContent(), json)) { LinearRetrieverBuilder builder = doParseInstance(parser); - assertThat(builder.getNormalizer(), instanceOf(MinMaxScoreNormalizer.class)); - for (ScoreNormalizer normalizer : builder.getNormalizers()) { + // Test that the top-level normalizer is properly applied - the individual + // retrievers specified "none" but should be overridden by top-level "minmax" + ScoreNormalizer[] normalizers = builder.getNormalizers(); + assertEquals(2, normalizers.length); + for (ScoreNormalizer normalizer : normalizers) { assertThat(normalizer, instanceOf(IdentityScoreNormalizer.class)); } } From a8e1d5fa80e2303043b55e4c7f6385753ae10668 Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 15:52:32 +0100 Subject: [PATCH 37/51] Cleaned up comments' --- .../rank/linear/LinearRetrieverBuilderTests.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index 4da1e6110f2a9..10ce196a56d6e 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -345,9 +345,9 @@ public void testTopLevelNormalizerWithRetrieversArray() { CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) ), - null, // fields - null, // query - MinMaxScoreNormalizer.INSTANCE, // top-level normalizer + null, + null, + MinMaxScoreNormalizer.INSTANCE, DEFAULT_RANK_WINDOW_SIZE, new float[] { 1.0f, 2.0f }, new ScoreNormalizer[] { null, null } @@ -374,9 +374,9 @@ public void testTopLevelNormalizerWithPerRetrieverOverrides() { CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) ), - null, // fields - null, // query - MinMaxScoreNormalizer.INSTANCE, // top-level normalizer + null, + null, + MinMaxScoreNormalizer.INSTANCE, DEFAULT_RANK_WINDOW_SIZE, new float[] { 1.0f, 2.0f }, new ScoreNormalizer[] { L2ScoreNormalizer.INSTANCE, null } From 04f715b3845399d3b2e671c157847e0954eadd1c Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 15:56:07 +0100 Subject: [PATCH 38/51] Reverted RetrieverBuilder --- .../org/elasticsearch/search/retriever/RetrieverBuilder.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java index 5b33dafec6614..0a62e9f968e4f 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/RetrieverBuilder.java @@ -281,7 +281,6 @@ public final boolean equals(Object o) { RetrieverBuilder that = (RetrieverBuilder) o; return Objects.equals(preFilterQueryBuilders, that.preFilterQueryBuilders) && Objects.equals(minScore, that.minScore) - && Objects.equals(retrieverName, that.retrieverName) && doEquals(o); } @@ -289,7 +288,7 @@ public final boolean equals(Object o) { @Override public final int hashCode() { - return Objects.hash(getClass(), preFilterQueryBuilders, minScore, retrieverName, doHashCode()); + return Objects.hash(getClass(), preFilterQueryBuilders, minScore, doHashCode()); } protected abstract int doHashCode(); From a8235c4bf2f50638ab4212af315653d1f17ccdea Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 14 Aug 2025 16:24:17 +0100 Subject: [PATCH 39/51] Cleaned up yaml --- .../test/linear/10_linear_retriever.yml | 145 +----------------- 1 file changed, 5 insertions(+), 140 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index ebd9b5085e0af..4db0ec63ea339 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1548,128 +1548,6 @@ setup: - match: { hits.total.value: 0 } - length: { hits.hits: 0 } ---- -"Linear retriever with top-level L2 normalization": - - requires: - cluster_features: [ "linear_retriever.l2_norm" ] - reason: "Support for L2 normalization" - - - do: - search: - index: test - body: - retriever: - linear: - normalizer: l2_norm - retrievers: [ - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 5.0 - } - } - } - }, - weight: 1.0 - }, - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "four" - } - } - }, - boost: 12.0 - } - } - } - }, - weight: 1.0 - } - ] - - - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "1" } # Both have same final score, ranked by document order - - match: { hits.hits.1._id: "4" } - # With per-retriever L2 normalization: each retriever normalizes its own scores - # First retriever: [5.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 - # Second retriever: [12.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 - - close_to: { hits.hits.0._score: { value: 1.0, error: 0.01} } - - close_to: { hits.hits.1._score: { value: 1.0, error: 0.01} } - ---- -"Linear retriever with top-level minmax normalization": - - do: - search: - index: test - body: - retriever: - linear: - normalizer: minmax - retrievers: [ - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 3.0 - } - } - } - }, - weight: 2.0 - }, - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "four" - } - } - }, - boost: 9.0 - } - } - } - }, - weight: 1.5 - } - ] - - - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "1" } - - match: { hits.hits.1._id: "4" } - # With per-retriever minmax normalization: each retriever normalizes its own scores - # First retriever: [3.0] → normalized to [1.0] → final: 1.0 * 2.0 = 2.0 - # Second retriever: [9.0] → normalized to [1.0] → final: 1.0 * 1.5 = 1.5 - - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } - - close_to: { hits.hits.1._score: { value: 1.5, error: 0.001} } - --- "Linear retriever with per-retriever normalizer override": - requires: @@ -1682,7 +1560,7 @@ setup: body: retriever: linear: - normalizer: minmax # Top-level normalizer + normalizer: minmax retrievers: [ { retriever: { @@ -1702,7 +1580,7 @@ setup: } }, weight: 1.0, - normalizer: l2_norm # Override with L2 norm + normalizer: l2_norm }, { retriever: { @@ -1721,14 +1599,11 @@ setup: } } }, - weight: 1.0 # Uses top-level minmax normalizer + weight: 1.0 } ] - match: { hits.total.value: 2 } - # First retriever uses L2 norm (score normalized to 1.0) - # Second retriever uses minmax (score normalized to 1.0 since it's the only score) - # Both docs get final score of 1.0, ranking by document order - match: { hits.hits.0._id: "1" } - match: { hits.hits.1._id: "4" } - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } @@ -1869,8 +1744,8 @@ setup: ] - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "1" } # 7.5 * 2.0 = 15.0 - - match: { hits.hits.1._id: "4" } # 3.2 * 1.0 = 3.2 + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "4" } - close_to: { hits.hits.0._score: { value: 15.0, error: 0.001} } - close_to: { hits.hits.1._score: { value: 3.2, error: 0.001} } @@ -1951,9 +1826,6 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "1" } - # All three retrievers return same doc with different normalizations: - # none: 10.0, l2_norm: 1.0, minmax: 1.0 (single value) - # Final score: 10.0 + 1.0 + 1.0 = 12.0 - close_to: { hits.hits.0._score: { value: 12.0, error: 0.001} } --- @@ -2006,8 +1878,6 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "1" } - # Only second retriever returns results, minmax normalization on [10.0] = 1.0 - # Final score: 2.0 * 1.0 = 2.0 - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } --- @@ -2027,8 +1897,6 @@ setup: normalizer: l2_norm - match: { hits.total.value: 2 } - # Both doc1 and doc4 have "other" in other_keyword field - # L2 normalization should be applied to generated retrievers --- "Linear retriever error handling - invalid normalizer": @@ -2110,8 +1978,5 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "1" } - match: { hits.hits.1._id: "4" } - # With per-retriever L2 normalization: each retriever normalizes its own scores - # First retriever: [1000.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 - # Second retriever: [1.0] → L2 normalized to [1.0] → final: 1.0 * 1.0 = 1.0 - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } - close_to: { hits.hits.1._score: { value: 1.0, error: 0.001} } From 3d6b32b25e95a20b07b60dde5a2ecdeabf093058 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 15 Aug 2025 16:03:44 +0100 Subject: [PATCH 40/51] Fixed samuel comments --- .../LinearRetrieverBuilderParsingTests.java | 2 +- .../test/linear/10_linear_retriever.yml | 31 ++----------------- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index cc2dc80b8a4d6..6ee40bd43efd1 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -153,7 +153,7 @@ public void testTopLevelNormalizer() throws IOException { try (XContentParser parser = createParser(XContentType.JSON.xContent(), json)) { LinearRetrieverBuilder builder = doParseInstance(parser); // Test that the top-level normalizer is properly applied - the individual - // retrievers specified "none" but should be overridden by top-level "minmax" + // Per-retriever 'none' should override top-level 'minmax' ScoreNormalizer[] normalizers = builder.getNormalizers(); assertEquals(2, normalizers.length); for (ScoreNormalizer normalizer : normalizers) { diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index 4db0ec63ea339..d28263552f985 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1492,33 +1492,6 @@ setup: - match: { hits.hits.0._id: "1" } - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } ---- -"linear retriever with top-level normalizer - validation test": - - do: - catch: /Unknown normalizer \[invalid\]/ - search: - index: test - body: - retriever: - linear: - normalizer: invalid - retrievers: [ - { - retriever: { - standard: { - query: { - term: { - keyword: { - value: "one" - } - } - } - } - }, - weight: 1.0 - } - ] - --- "linear retriever with top-level normalizer - empty results": - do: @@ -1901,13 +1874,13 @@ setup: --- "Linear retriever error handling - invalid normalizer": - do: - catch: bad_request + catch: /Unknown normalizer \[invalid\]/ search: index: test body: retriever: linear: - normalizer: invalid_normalizer + normalizer: invalid retrievers: [ { retriever: { From 5c0f006767d65fbe8eb0d27af41baf7a8375f691 Mon Sep 17 00:00:00 2001 From: Mridula Date: Fri, 15 Aug 2025 16:41:03 +0100 Subject: [PATCH 41/51] Worked on Michael comments --- .../rest-apis/retrievers/linear-retriever.md | 57 +++++-------------- .../retrievers/retrievers-examples.md | 2 +- .../rank/linear/LinearRetrieverBuilder.java | 2 - .../LinearRetrieverBuilderParsingTests.java | 8 +-- 4 files changed, 17 insertions(+), 52 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index b241f245c0580..95abc5a0ab215 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -31,11 +31,12 @@ Combining `query` and `retrievers` is not supported. `normalizer` {applies_to}`stack: ga 9.1` : (Optional, String) - The normalizer to use when using the [multi-field query format](../retrievers.md#multi-field-query-format). + The normalizer to use when combining results. See [normalizers](#linear-retriever-normalizers) for supported values. Required when `query` is specified. - - When used with `retrievers`, the top-level `normalizer` serves as a default for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer when both are specified. + + When used with the [multi-field query format](../retrievers.md#multi-field-query-format) (`query` parameter), normalizes scores per [field grouping](../retrievers.md#multi-field-field-grouping). + When used with `retrievers`, serves as the default normalizer for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer. ::::{warning} Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches. @@ -76,9 +77,9 @@ Each entry in the `retrievers` array specifies the following parameters: `normalizer` : (Optional, String) - Specifies how the retriever’s score will be normalized before applying the specified `weight`. + Specifies how the retriever's score will be normalized before applying the specified `weight`. See [normalizers](#linear-retriever-normalizers) for supported values. - Defaults to `none`. + If not specified, uses the top-level `normalizer` or defaults to `none` if no top-level normalizer is set. See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. @@ -96,7 +97,7 @@ The `linear` retriever supports the following normalizers: ## Example -This example of a hybrid search weights KNN results five times more heavily than BM25 results in the final ranking. +This example of a hybrid search weights KNN results five times more heavily than BM25 results in the final ranking, with a top-level normalizer applied to all retrievers. ```console GET my_index/_search @@ -107,35 +108,14 @@ GET my_index/_search { "retriever": { "knn": { - ... + "field": "title_vector", + "query_vector": [0.1, 0.2, 0.3], + "k": 10, + "num_candidates": 100 } }, "weight": 5 # KNN query weighted 5x }, - { - "retriever": { - "standard": { - ... - } - }, - "weight": 1.5 # BM25 query weighted 1.5x - } - ] - } - } -} -``` - -### Using top-level normalizer - -This example shows how to use a top-level normalizer that applies to all sub-retrievers: - -```console -GET my_index/_search -{ - "retriever": { - "linear": { - "retrievers": [ { "retriever": { "standard": { @@ -146,18 +126,7 @@ GET my_index/_search } } }, - "weight": 1.0 - }, - { - "retriever": { - "knn": { - "field": "title_vector", - "query_vector": [0.1, 0.2, 0.3], - "k": 10, - "num_candidates": 100 - } - }, - "weight": 2.0 + "weight": 1.5 # BM25 query weighted 1.5x } ], "normalizer": "minmax" @@ -166,6 +135,6 @@ GET my_index/_search } ``` -In this example, the `minmax` normalizer is applied to both the standard retriever and the kNN retriever. The top-level normalizer serves as a default that can be overridden by individual sub-retrievers. When using the multi-field query format, the top-level normalizer is applied to all generated inner retrievers. +In this example, the `minmax` normalizer is applied to both the kNN retriever and the standard retriever. The top-level normalizer serves as a default that can be overridden by individual sub-retrievers. When using the multi-field query format, the top-level normalizer is applied to all generated inner retrievers. See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever). diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md index b9dcc24a841ed..742d333872291 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md @@ -247,7 +247,7 @@ GET /retrievers_example/_search } }, "weight": 1.5, - "normalizer": "minmax" + "normalizer": "l2_norm" } ], "rank_window_size": 10 diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index 20feac674bc4c..a718493935a64 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -178,8 +178,6 @@ public LinearRetrieverBuilder( this.query = query; this.normalizer = normalizer; this.weights = weights; - - // Initialize normalizers array this.normalizers = new ScoreNormalizer[normalizers.length]; for (int i = 0; i < normalizers.length; i++) { this.normalizers[i] = resolveNormalizer(normalizers[i], normalizer); diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 6ee40bd43efd1..49cba70514296 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -69,11 +69,9 @@ protected LinearRetrieverBuilder createTestInstance() { new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) ); weights[i] = randomFloat(); - if (randomBoolean()) { - normalizers[i] = randomScoreNormalizer(); - } else { - normalizers[i] = null; - } + normalizers[i] = randomFrom( + new ScoreNormalizer[] { null, MinMaxScoreNormalizer.INSTANCE, L2ScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE } + ); } return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers); From 2e569d73b6d061ae01d35e2078df31abbfdd727a Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 19 Aug 2025 16:16:05 +0100 Subject: [PATCH 42/51] Reverted the retriever example change --- .../elasticsearch/rest-apis/retrievers/retrievers-examples.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md index 742d333872291..b9dcc24a841ed 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md @@ -247,7 +247,7 @@ GET /retrievers_example/_search } }, "weight": 1.5, - "normalizer": "l2_norm" + "normalizer": "minmax" } ], "rank_window_size": 10 From 29c4f248da28c2d0a9ebb07214407929d6af8eae Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 19 Aug 2025 16:28:17 +0100 Subject: [PATCH 43/51] The test was modified to include equalioty check --- .../xpack/rank/linear/LinearRetrieverIT.java | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java index 5ef6c689d8778..b00af1713dcb6 100644 --- a/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java +++ b/x-pack/plugin/rank-rrf/src/internalClusterTest/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverIT.java @@ -837,15 +837,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } public void testMixedNormalizerInheritance() throws IOException { - client().prepareIndex(INDEX) - .setId("1") - .setSource("field1", "elasticsearch search", "field2", "database technology", "score", 10) - .get(); - client().prepareIndex(INDEX).setId("2").setSource("field1", "lucene engine", "field2", "search technology", "score", 5).get(); - client().prepareIndex(INDEX) - .setId("3") - .setSource("field1", "information retrieval", "field2", "database search", "score", 15) - .get(); + client().prepareIndex(INDEX).setId("1").setSource("field1", "elasticsearch only", "field2", "no technology here").get(); + client().prepareIndex(INDEX).setId("2").setSource("field1", "no elasticsearch", "field2", "technology only").get(); + client().prepareIndex(INDEX).setId("3").setSource("field1", "search term", "field2", "no technology").get(); refresh(INDEX); LinearRetrieverBuilder linearRetriever = new LinearRetrieverBuilder( @@ -871,7 +865,7 @@ public void testMixedNormalizerInheritance() throws IOException { assertThat(linearRetriever.getNormalizers()[2], equalTo(MinMaxScoreNormalizer.INSTANCE)); assertResponse(client().prepareSearch(INDEX).setSource(new SearchSourceBuilder().retriever(linearRetriever)), searchResponse -> { - assertThat(searchResponse.getHits().getTotalHits().value() > 0L, is(true)); + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(3L)); }); } } From 078515c8c0da17a8036af0787c69e704d04b76c9 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 19 Aug 2025 16:34:25 +0100 Subject: [PATCH 44/51] cleaned up resolve normalizer --- .../rank/linear/LinearRetrieverBuilder.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java index a718493935a64..d5196c8078a75 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java @@ -75,16 +75,6 @@ public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder PARSER = new ConstructingObjectParser<>( NAME, @@ -133,6 +123,16 @@ private static ScoreNormalizer[] getDefaultNormalizers(List inn return normalizers; } + private static ScoreNormalizer resolveNormalizer(ScoreNormalizer componentNormalizer, ScoreNormalizer topLevelNormalizer) { + if (componentNormalizer != null) { + return componentNormalizer; + } + if (topLevelNormalizer != null) { + return topLevelNormalizer; + } + return DEFAULT_NORMALIZER; + } + public static LinearRetrieverBuilder fromXContent(XContentParser parser, RetrieverParserContext context) throws IOException { if (context.clusterSupportsFeature(LINEAR_RETRIEVER_SUPPORTED) == false) { throw new ParsingException(parser.getTokenLocation(), "unknown retriever [" + NAME + "]"); @@ -341,7 +341,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { for (var weightedRetriever : r) { retrievers.add(weightedRetriever.retrieverSource()); weights[index] = weightedRetriever.weight(); - normalizers[index] = resolveNormalizer(null, normalizer); + normalizers[index] = normalizer; index++; } @@ -359,7 +359,7 @@ protected RetrieverBuilder doRewrite(QueryRewriteContext ctx) { Arrays.fill(weights, DEFAULT_WEIGHT); ScoreNormalizer[] normalizers = new ScoreNormalizer[fieldsInnerRetrievers.size()]; - Arrays.fill(normalizers, resolveNormalizer(null, normalizer)); + Arrays.fill(normalizers, normalizer); // TODO: This is a incomplete solution as it does not address other incomplete copy issues // (such as dropping the retriever name and min score) From 34e14f7336a35f6ee4f4698fe06d7ab64839ee04 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 19 Aug 2025 16:43:07 +0100 Subject: [PATCH 45/51] optimised the parsing test --- .../LinearRetrieverBuilderParsingTests.java | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 49cba70514296..5bb54431a8c5e 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -33,6 +33,12 @@ public class LinearRetrieverBuilderParsingTests extends AbstractXContentTestCase { private static List xContentRegistryEntries; + private static final ScoreNormalizer[] SCORE_NORMALIZERS = new ScoreNormalizer[] { + null, + MinMaxScoreNormalizer.INSTANCE, + L2ScoreNormalizer.INSTANCE, + IdentityScoreNormalizer.INSTANCE }; + @BeforeClass public static void init() { xContentRegistryEntries = new SearchModule(Settings.EMPTY, emptyList()).getNamedXContents(); @@ -69,9 +75,7 @@ protected LinearRetrieverBuilder createTestInstance() { new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) ); weights[i] = randomFloat(); - normalizers[i] = randomFrom( - new ScoreNormalizer[] { null, MinMaxScoreNormalizer.INSTANCE, L2ScoreNormalizer.INSTANCE, IdentityScoreNormalizer.INSTANCE } - ); + normalizers[i] = randomFrom(SCORE_NORMALIZERS); } return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers); @@ -112,12 +116,7 @@ protected NamedXContentRegistry xContentRegistry() { } private static ScoreNormalizer randomScoreNormalizer() { - int random = randomInt(2); - return switch (random) { - case 0 -> MinMaxScoreNormalizer.INSTANCE; - case 1 -> L2ScoreNormalizer.INSTANCE; - default -> IdentityScoreNormalizer.INSTANCE; - }; + return randomFrom(SCORE_NORMALIZERS); } public void testTopLevelNormalizer() throws IOException { From fdb0f88cf7fe33a3ed778a81e7e1730ea42c2322 Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 19 Aug 2025 16:50:16 +0100 Subject: [PATCH 46/51] cleaned up duplicates --- .../linear/LinearRetrieverBuilderTests.java | 45 ------------------- 1 file changed, 45 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java index 10ce196a56d6e..8264ccdb22e38 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderTests.java @@ -386,51 +386,6 @@ public void testTopLevelNormalizerWithPerRetrieverOverrides() { assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); } - public void testExplicitIdentityNormalizerOverridesTopLevel() { - StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); - KnnRetrieverBuilder knnRetriever = new KnnRetrieverBuilder( - "title_vector", - new float[] { 0.1f, 0.2f, 0.3f }, - null, - 10, - 100, - null, - null - ); - - LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( - List.of( - CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever), - CompoundRetrieverBuilder.RetrieverSource.from(knnRetriever) - ), - null, - null, - MinMaxScoreNormalizer.INSTANCE, - DEFAULT_RANK_WINDOW_SIZE, - new float[] { 1.0f, 2.0f }, - new ScoreNormalizer[] { IdentityScoreNormalizer.INSTANCE, null } - ); - - assertEquals(IdentityScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); - assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[1]); - } - - public void testNullNormalizerUsesTopLevelAsDefault() { - StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); - - LinearRetrieverBuilder retriever = new LinearRetrieverBuilder( - List.of(CompoundRetrieverBuilder.RetrieverSource.from(standardRetriever)), - null, - null, - MinMaxScoreNormalizer.INSTANCE, - DEFAULT_RANK_WINDOW_SIZE, - new float[] { 1.0f }, - new ScoreNormalizer[] { null } - ); - - assertEquals(MinMaxScoreNormalizer.INSTANCE, retriever.getNormalizers()[0]); - } - public void testNullNormalizersWithoutTopLevelUsesIdentity() { StandardRetrieverBuilder standardRetriever = new StandardRetrieverBuilder(new MatchQueryBuilder("title", "elasticsearch")); From f6484e13833c9361d2ebaad039c954d208a373ea Mon Sep 17 00:00:00 2001 From: Mridula Date: Tue, 19 Aug 2025 17:15:11 +0100 Subject: [PATCH 47/51] Added cluster features --- .../xpack/rank/RankRRFFeatures.java | 3 +- .../test/linear/10_linear_retriever.yml | 66 +++++++------------ 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 326a2f276fa6a..7dd58157a7918 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -22,10 +22,11 @@ public class RankRRFFeatures implements FeatureSpecification { public static final NodeFeature LINEAR_RETRIEVER_SUPPORTED = new NodeFeature("linear_retriever_supported"); + public static final NodeFeature LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER = new NodeFeature("linear_retriever.top_level_normalizer"); @Override public Set getFeatures() { - return Set.of(LINEAR_RETRIEVER_SUPPORTED); + return Set.of(LINEAR_RETRIEVER_SUPPORTED, LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER); } @Override diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index d28263552f985..35679c99adfab 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1336,6 +1336,9 @@ setup: --- "linear retriever with top-level normalizer - minmax": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test @@ -1383,8 +1386,8 @@ setup: --- "linear retriever with top-level normalizer - l2_norm": - requires: - cluster_features: [ "linear_retriever.l2_norm" ] - reason: "Requires l2_norm normalization support in linear retriever" + cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: index: test @@ -1431,6 +1434,9 @@ setup: --- "linear retriever with top-level normalizer and per-retriever override": + - requires: + cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: index: test @@ -1476,24 +1482,11 @@ setup: - match: { hits.hits.2._id: "2" } - close_to: { hits.hits.2._score: { value: 0.45, error: 0.01} } ---- -"linear retriever with top-level normalizer - multi-field format": - - do: - search: - index: test - body: - retriever: - linear: - normalizer: minmax - query: one - fields: [keyword, text] - - - match: { hits.total.value: 1 } - - match: { hits.hits.0._id: "1" } - - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } - --- "linear retriever with top-level normalizer - empty results": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test @@ -1524,9 +1517,8 @@ setup: --- "Linear retriever with per-retriever normalizer override": - requires: - cluster_features: [ "linear_retriever.l2_norm" ] - reason: "Support for L2 normalization" - + cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: index: test @@ -1668,6 +1660,9 @@ setup: --- "Linear retriever with identity normalizer (no normalization)": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test @@ -1803,6 +1798,9 @@ setup: --- "Linear retriever normalization with zero scores": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test @@ -1853,26 +1851,11 @@ setup: - match: { hits.hits.0._id: "1" } - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } ---- -"Linear retriever with field+query format and normalizer inheritance": - - requires: - cluster_features: [ "linear_retriever.multi_fields_query_format_support" ] - reason: "Support for multi-fields query format" - - - do: - search: - index: test - body: - retriever: - linear: - fields: ["keyword", "other_keyword"] - query: "other" - normalizer: l2_norm - - - match: { hits.total.value: 2 } - --- "Linear retriever error handling - invalid normalizer": + - requires: + cluster_features: [ "linear_retriever.top_level_normalizer" ] + reason: "Support for top-level normalizer in linear retriever" - do: catch: /Unknown normalizer \[invalid\]/ search: @@ -1897,9 +1880,8 @@ setup: --- "Linear retriever with large score differences and L2 normalization": - requires: - cluster_features: [ "linear_retriever.l2_norm" ] - reason: "Support for L2 normalization" - + cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: index: test From 45e2f7e1f41420c1dcfd184449e2aaeed038b42c Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 20 Aug 2025 11:58:14 +0100 Subject: [PATCH 48/51] Modified docs --- .../elasticsearch/rest-apis/retrievers/linear-retriever.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index 95abc5a0ab215..9072c291065fa 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -31,12 +31,12 @@ Combining `query` and `retrievers` is not supported. `normalizer` {applies_to}`stack: ga 9.1` : (Optional, String) - The normalizer to use when combining results. + The top-level normalizer to use when combining results. See [normalizers](#linear-retriever-normalizers) for supported values. Required when `query` is specified. When used with the [multi-field query format](../retrievers.md#multi-field-query-format) (`query` parameter), normalizes scores per [field grouping](../retrievers.md#multi-field-field-grouping). - When used with `retrievers`, serves as the default normalizer for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer. + Otherwise serves as the default normalizer for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer. ::::{warning} Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches. From d7c6dbd6457a4972665e344a272420ac30b1fb95 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 20 Aug 2025 12:20:18 +0100 Subject: [PATCH 49/51] worked on all the changes --- .../org/elasticsearch/xpack/rank/RankRRFFeatures.java | 5 +++-- .../rest-api-spec/test/linear/10_linear_retriever.yml | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java index 7dd58157a7918..a6a193d62de88 100644 --- a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java +++ b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java @@ -26,7 +26,7 @@ public class RankRRFFeatures implements FeatureSpecification { @Override public Set getFeatures() { - return Set.of(LINEAR_RETRIEVER_SUPPORTED, LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER); + return Set.of(LINEAR_RETRIEVER_SUPPORTED); } @Override @@ -38,7 +38,8 @@ public Set getTestFeatures() { LINEAR_RETRIEVER_MINSCORE_FIX, LinearRetrieverBuilder.MULTI_FIELDS_QUERY_FORMAT_SUPPORT, RRFRetrieverBuilder.MULTI_FIELDS_QUERY_FORMAT_SUPPORT, - RRFRetrieverBuilder.WEIGHTED_SUPPORT + RRFRetrieverBuilder.WEIGHTED_SUPPORT, + LINEAR_RETRIEVER_TOP_LEVEL_NORMALIZER ); } } diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index 35679c99adfab..bf8517f300103 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1386,7 +1386,7 @@ setup: --- "linear retriever with top-level normalizer - l2_norm": - requires: - cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + cluster_features: [ "linear_retriever.top_level_normalizer" ] reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: @@ -1435,7 +1435,7 @@ setup: --- "linear retriever with top-level normalizer and per-retriever override": - requires: - cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + cluster_features: [ "linear_retriever.top_level_normalizer" ] reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: @@ -1517,7 +1517,7 @@ setup: --- "Linear retriever with per-retriever normalizer override": - requires: - cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + cluster_features: [ "linear_retriever.top_level_normalizer" ] reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: @@ -1577,7 +1577,7 @@ setup: --- "Linear retriever with mixed normalizers - inheritance test": - requires: - cluster_features: [ "linear_retriever.l2_norm" ] + cluster_features: [ "linear_retriever.top_level_normalizer" ] reason: "Support for L2 normalization" - do: @@ -1880,7 +1880,7 @@ setup: --- "Linear retriever with large score differences and L2 normalization": - requires: - cluster_features: [ "linear_retriever.l2_norm", "linear_retriever.top_level_normalizer" ] + cluster_features: [ "linear_retriever.top_level_normalizer" ] reason: "Support for L2 normalization and top-level normalizer in linear retriever" - do: search: From 7ae580001a2aafb106abb20ad5208c9b800bcfb6 Mon Sep 17 00:00:00 2001 From: Mridula Date: Wed, 20 Aug 2025 14:20:49 +0100 Subject: [PATCH 50/51] Update 10_linear_retriever.yml --- .../rest-api-spec/test/linear/10_linear_retriever.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index bf8517f300103..e2f8e73fe2fc1 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1387,7 +1387,7 @@ setup: "linear retriever with top-level normalizer - l2_norm": - requires: cluster_features: [ "linear_retriever.top_level_normalizer" ] - reason: "Support for L2 normalization and top-level normalizer in linear retriever" + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test @@ -1436,7 +1436,7 @@ setup: "linear retriever with top-level normalizer and per-retriever override": - requires: cluster_features: [ "linear_retriever.top_level_normalizer" ] - reason: "Support for L2 normalization and top-level normalizer in linear retriever" + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test @@ -1518,7 +1518,7 @@ setup: "Linear retriever with per-retriever normalizer override": - requires: cluster_features: [ "linear_retriever.top_level_normalizer" ] - reason: "Support for L2 normalization and top-level normalizer in linear retriever" + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test @@ -1578,7 +1578,7 @@ setup: "Linear retriever with mixed normalizers - inheritance test": - requires: cluster_features: [ "linear_retriever.top_level_normalizer" ] - reason: "Support for L2 normalization" + reason: "SSupport for top-level normalizer in linear retriever" - do: search: @@ -1881,7 +1881,7 @@ setup: "Linear retriever with large score differences and L2 normalization": - requires: cluster_features: [ "linear_retriever.top_level_normalizer" ] - reason: "Support for L2 normalization and top-level normalizer in linear retriever" + reason: "Support for top-level normalizer in linear retriever" - do: search: index: test From 2bc12ec6861e9edb8283cc2622f90a95a410aeeb Mon Sep 17 00:00:00 2001 From: Mridula Date: Thu, 21 Aug 2025 11:08:39 +0100 Subject: [PATCH 51/51] Nitpicks and some other enhancement comments resolved --- .../rest-apis/retrievers/linear-retriever.md | 5 ++ .../LinearRetrieverBuilderParsingTests.java | 2 +- .../test/linear/10_linear_retriever.yml | 64 ++----------------- 3 files changed, 10 insertions(+), 61 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md index 9072c291065fa..dd266b0092d6c 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/linear-retriever.md @@ -38,6 +38,10 @@ Combining `query` and `retrievers` is not supported. When used with the [multi-field query format](../retrievers.md#multi-field-query-format) (`query` parameter), normalizes scores per [field grouping](../retrievers.md#multi-field-field-grouping). Otherwise serves as the default normalizer for any sub-retriever that doesn't specify its own normalizer. Per-retriever normalizers always take precedence over the top-level normalizer. + :::{note} + **Top-level normalizer support for sub-retrievers**: The ability to use a top-level normalizer as a default for sub-retrievers was introduced in Elasticsearch 9.2+. In earlier versions, only per-retriever normalizers are supported. + ::: + ::::{warning} Avoid using `none` as that will disable normalization and may bias the result set towards lexical matches. See [field grouping](../retrievers.md#multi-field-field-grouping) for more information. @@ -80,6 +84,7 @@ Each entry in the `retrievers` array specifies the following parameters: Specifies how the retriever's score will be normalized before applying the specified `weight`. See [normalizers](#linear-retriever-normalizers) for supported values. If not specified, uses the top-level `normalizer` or defaults to `none` if no top-level normalizer is set. + {applies_to}`stack: ga 9.2` See also [this hybrid search example](retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers. diff --git a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java index 5bb54431a8c5e..99b67773e9d05 100644 --- a/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java +++ b/x-pack/plugin/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilderParsingTests.java @@ -75,7 +75,7 @@ protected LinearRetrieverBuilder createTestInstance() { new CompoundRetrieverBuilder.RetrieverSource(TestRetrieverBuilder.createRandomTestRetrieverBuilder(), null) ); weights[i] = randomFloat(); - normalizers[i] = randomFrom(SCORE_NORMALIZERS); + normalizers[i] = randomScoreNormalizer(); } return new LinearRetrieverBuilder(innerRetrievers, fields, query, normalizer, rankWindowSize, weights, normalizers); diff --git a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml index e2f8e73fe2fc1..7822f470d9794 100644 --- a/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml +++ b/x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml @@ -1514,66 +1514,6 @@ setup: - match: { hits.total.value: 0 } - length: { hits.hits: 0 } ---- -"Linear retriever with per-retriever normalizer override": - - requires: - cluster_features: [ "linear_retriever.top_level_normalizer" ] - reason: "Support for top-level normalizer in linear retriever" - - do: - search: - index: test - body: - retriever: - linear: - normalizer: minmax - retrievers: [ - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "one" - } - } - }, - boost: 5.0 - } - } - } - }, - weight: 1.0, - normalizer: l2_norm - }, - { - retriever: { - standard: { - query: { - constant_score: { - filter: { - term: { - keyword: { - value: "four" - } - } - }, - boost: 12.0 - } - } - } - }, - weight: 1.0 - } - ] - - - match: { hits.total.value: 2 } - - match: { hits.hits.0._id: "1" } - - match: { hits.hits.1._id: "4" } - - close_to: { hits.hits.0._score: { value: 1.0, error: 0.001} } - - close_to: { hits.hits.1._score: { value: 1.0, error: 0.001} } - --- "Linear retriever with mixed normalizers - inheritance test": - requires: @@ -1657,6 +1597,10 @@ setup: # Third retriever: L2 norm on [6.0] = 1.0, final = 1.0 * 2.0 = 2.0 - match: { hits.hits.0._id: "3" } # Highest score due to weight=2.0 - close_to: { hits.hits.0._score: { value: 2.0, error: 0.001} } + - match: { hits.hits.1._id: "1" } # Second score = 1.0 + - close_to: { hits.hits.1._score: { value: 1.0, error: 0.001} } + - match: { hits.hits.2._id: "2" } # Third score = 1.0 + - close_to: { hits.hits.2._score: { value: 1.0, error: 0.001} } --- "Linear retriever with identity normalizer (no normalization)":