From 44c0dab73c520b2fb7db8b2f77fad1feb95692eb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Nov 2025 12:02:31 +0100 Subject: [PATCH 1/7] Fix knn min_candidates to not include k --- .../elasticsearch/xpack/esql/querydsl/query/KnnQuery.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 12eb64354a9d0..4f4853ddff50c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -53,14 +53,13 @@ protected QueryBuilder asBuilder() { } Float vectorSimilarity = (Float) options.get(VECTOR_SIMILARITY_FIELD.getPreferredName()); Integer minCandidates = (Integer) options.get(Knn.MIN_CANDIDATES_OPTION); - int adjustedK = Math.max(k, minCandidates == null ? 0 : minCandidates); - minCandidates = minCandidates == null ? null : Math.max(minCandidates, adjustedK); + minCandidates = minCandidates == null ? null : Math.max(minCandidates, k); // TODO: expose visit_percentage in ESQL KnnVectorQueryBuilder queryBuilder = new KnnVectorQueryBuilder( field, query, - adjustedK, + k, minCandidates, null, rescoreVectorBuilder, From d661af3c9b188de22e02df2c36d5714b91c5e636 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Nov 2025 15:23:10 +0100 Subject: [PATCH 2/7] Add new KNN params --- .../functions/functionNamedParams/knn.md | 8 +++++- .../esql/kibana/definition/functions/knn.json | 4 +-- .../esql/expression/function/vector/Knn.java | 25 ++++++++++++++++++- .../xpack/esql/querydsl/query/KnnQuery.java | 6 ++++- 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md index f38a8e8d84584..3e1def6931772 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md +++ b/docs/reference/query-languages/esql/_snippets/functions/functionNamedParams/knn.md @@ -5,8 +5,14 @@ `boost` : (float) Floating point number used to decrease or increase the relevance scores of the query.Defaults to 1.0. +`k` +: (integer) The number of nearest neighbors to return from each shard. Elasticsearch collects k results from each shard, then merges them to find the global top results. This value must be less than or equal to num_candidates. This value is automatically set with any LIMIT applied to the function. + +`visit_percentage` +: (float) The percentage of vectors to explore per shard while doing knn search with bbq_disk. Must be between 0 and 100. 0 will default to using num_candidates for calculating the percent visited. Increasing visit_percentage tends to improve the accuracy of the final results. If visit_percentage is set for bbq_disk, num_candidates is ignored. Defaults to ~1% per shard for every 1 million vectors + `min_candidates` -: (integer) The minimum number of nearest neighbor candidates to consider per shard while doing knn search. KNN may use a higher number of candidates in case the query can't use a approximate results. Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * LIMIT used for the query. +: (integer) The minimum number of nearest neighbor candidates to consider per shard while doing knn search. KNN may use a higher number of candidates in case the query can't use a approximate results. Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * k (or LIMIT) used for the query. `rescore_oversample` : (double) Applies the specified oversampling for rescoring quantized vectors. See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details. diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/knn.json b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json index aeb1c492bcba1..45060f6440df1 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/knn.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json @@ -21,7 +21,7 @@ { "name" : "options", "type" : "function_named_parameters", - "mapParams" : "{name='boost', values=[2.5], description='Floating point number used to decrease or increase the relevance scores of the query.Defaults to 1.0.'}, {name='min_candidates', values=[10], description='The minimum number of nearest neighbor candidates to consider per shard while doing knn search. KNN may use a higher number of candidates in case the query can't use a approximate results. Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * LIMIT used for the query.'}, {name='rescore_oversample', values=[3.5], description='Applies the specified oversampling for rescoring quantized vectors. See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.'}, {name='similarity', values=[0.01], description='The minimum similarity required for a document to be considered a match. The similarity value calculated relates to the raw similarity used, not the document score.'}", + "mapParams" : "{name='boost', values=[2.5], description='Floating point number used to decrease or increase the relevance scores of the query.Defaults to 1.0.'}, {name='k', values=[10], description='The number of nearest neighbors to return from each shard. Elasticsearch collects k results from each shard, then merges them to find the global top results. This value must be less than or equal to num_candidates. This value is automatically set with any LIMIT applied to the function.'}, {name='visit_percentage', values=[10], description='The percentage of vectors to explore per shard while doing knn search with bbq_disk. Must be between 0 and 100. 0 will default to using num_candidates for calculating the percent visited. Increasing visit_percentage tends to improve the accuracy of the final results. If visit_percentage is set for bbq_disk, num_candidates is ignored. Defaults to ~1% per shard for every 1 million vectors'}, {name='min_candidates', values=[10], description='The minimum number of nearest neighbor candidates to consider per shard while doing knn search. KNN may use a higher number of candidates in case the query can't use a approximate results. Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * k (or LIMIT) used for the query.'}, {name='rescore_oversample', values=[3.5], description='Applies the specified oversampling for rescoring quantized vectors. See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.'}, {name='similarity', values=[0.01], description='The minimum similarity required for a document to be considered a match. The similarity value calculated relates to the raw similarity used, not the document score.'}", "optional" : true, "description" : "(Optional) kNN additional options as <>. See [knn query](https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-knn-query) for more information." } @@ -46,7 +46,7 @@ { "name" : "options", "type" : "function_named_parameters", - "mapParams" : "{name='boost', values=[2.5], description='Floating point number used to decrease or increase the relevance scores of the query.Defaults to 1.0.'}, {name='min_candidates', values=[10], description='The minimum number of nearest neighbor candidates to consider per shard while doing knn search. KNN may use a higher number of candidates in case the query can't use a approximate results. Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * LIMIT used for the query.'}, {name='rescore_oversample', values=[3.5], description='Applies the specified oversampling for rescoring quantized vectors. See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.'}, {name='similarity', values=[0.01], description='The minimum similarity required for a document to be considered a match. The similarity value calculated relates to the raw similarity used, not the document score.'}", + "mapParams" : "{name='boost', values=[2.5], description='Floating point number used to decrease or increase the relevance scores of the query.Defaults to 1.0.'}, {name='k', values=[10], description='The number of nearest neighbors to return from each shard. Elasticsearch collects k results from each shard, then merges them to find the global top results. This value must be less than or equal to num_candidates. This value is automatically set with any LIMIT applied to the function.'}, {name='visit_percentage', values=[10], description='The percentage of vectors to explore per shard while doing knn search with bbq_disk. Must be between 0 and 100. 0 will default to using num_candidates for calculating the percent visited. Increasing visit_percentage tends to improve the accuracy of the final results. If visit_percentage is set for bbq_disk, num_candidates is ignored. Defaults to ~1% per shard for every 1 million vectors'}, {name='min_candidates', values=[10], description='The minimum number of nearest neighbor candidates to consider per shard while doing knn search. KNN may use a higher number of candidates in case the query can't use a approximate results. Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. Defaults to 1.5 * k (or LIMIT) used for the query.'}, {name='rescore_oversample', values=[3.5], description='Applies the specified oversampling for rescoring quantized vectors. See [oversampling and rescoring quantized vectors](docs-content://solutions/search/vector/knn.md#dense-vector-knn-search-rescoring) for details.'}, {name='similarity', values=[0.01], description='The minimum similarity required for a document to be considered a match. The similarity value calculated relates to the raw similarity used, not the document score.'}", "optional" : true, "description" : "(Optional) kNN additional options as <>. See [knn query](https://www.elastic.co/docs/reference/query-languages/query-dsl/query-dsl-knn-query) for more information." } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 81a3432f139eb..7e19ae48fa660 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -52,7 +52,9 @@ import static java.util.Map.entry; import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VISIT_PERCENTAGE_FIELD; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FOURTH; import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; @@ -72,8 +74,10 @@ public class Knn extends SingleFieldFullTextFunction implements OptionalArgument public static final String MIN_CANDIDATES_OPTION = "min_candidates"; public static final Map ALLOWED_OPTIONS = Map.ofEntries( + entry(K_FIELD.getPreferredName(), INTEGER), entry(MIN_CANDIDATES_OPTION, INTEGER), entry(VECTOR_SIMILARITY_FIELD.getPreferredName(), FLOAT), + entry(VISIT_PERCENTAGE_FIELD.getPreferredName(), FLOAT), entry(BOOST_FIELD.getPreferredName(), FLOAT), entry(KnnQuery.RESCORE_OVERSAMPLE_FIELD, FLOAT) ); @@ -102,6 +106,15 @@ public Knn( @MapParam( name = "options", params = { + @MapParam.MapParamEntry( + name = "k", + type = "integer", + valueHint = { "10" }, + description = "The number of nearest neighbors to return from each shard. " + + "Elasticsearch collects k results from each shard, then merges them to find the global top results. " + + "This value must be less than or equal to num_candidates. " + + "This value is automatically set with any LIMIT applied to the function." + ), @MapParam.MapParamEntry( name = "boost", type = "float", @@ -116,7 +129,17 @@ public Knn( description = "The minimum number of nearest neighbor candidates to consider per shard while doing knn search. " + " KNN may use a higher number of candidates in case the query can't use a approximate results. " + "Cannot exceed 10,000. Increasing min_candidates tends to improve the accuracy of the final results. " - + "Defaults to 1.5 * LIMIT used for the query." + + "Defaults to 1.5 * k (or LIMIT) used for the query." + ), + @MapParam.MapParamEntry( + name = "visit_percentage", + type = "float", + valueHint = { "10" }, + description = "The percentage of vectors to explore per shard while doing knn search with bbq_disk. " + + "Must be between 0 and 100. 0 will default to using num_candidates for calculating the percent visited. " + + "Increasing visit_percentage tends to improve the accuracy of the final results. " + + "If visit_percentage is set for bbq_disk, num_candidates is ignored. " + + "Defaults to ~1% per shard for every 1 million vectors" ), @MapParam.MapParamEntry( name = "similarity", diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index 4f4853ddff50c..a13442e4af8a0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -21,7 +21,9 @@ import java.util.Objects; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; +import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VISIT_PERCENTAGE_FIELD; public class KnnQuery extends Query { @@ -51,8 +53,10 @@ protected QueryBuilder asBuilder() { if (oversample != null) { rescoreVectorBuilder = new RescoreVectorBuilder(oversample); } + Integer k = (Integer) options.get(K_FIELD); Float vectorSimilarity = (Float) options.get(VECTOR_SIMILARITY_FIELD.getPreferredName()); Integer minCandidates = (Integer) options.get(Knn.MIN_CANDIDATES_OPTION); + Float visitPercentage = (Float) options.get(VISIT_PERCENTAGE_FIELD); minCandidates = minCandidates == null ? null : Math.max(minCandidates, k); // TODO: expose visit_percentage in ESQL @@ -61,7 +65,7 @@ protected QueryBuilder asBuilder() { query, k, minCandidates, - null, + visitPercentage, rescoreVectorBuilder, vectorSimilarity ); From 8e7a03c80b6f5c02a3a68e2fb4b3ef82a4ac371f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Nov 2025 15:54:12 +0100 Subject: [PATCH 3/7] Fix tests --- .../esql/expression/function/vector/Knn.java | 37 +++++++++++-------- .../rules/logical/PushLimitToKnn.java | 2 +- .../xpack/esql/querydsl/query/KnnQuery.java | 4 +- .../LocalPhysicalPlanOptimizerTests.java | 37 ++++++------------- .../optimizer/LogicalPlanOptimizerTests.java | 12 +++--- 5 files changed, 40 insertions(+), 52 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 7e19ae48fa660..c71c60a1a79a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -66,8 +66,8 @@ public class Knn extends SingleFieldFullTextFunction implements OptionalArgument public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); - // k is not serialized as it's already included in the query builder on the rewrite step before being sent to data nodes - private final transient Integer k; + // Implicit k is not serialized as it's already included in the query builder on the rewrite step before being sent to data nodes + private final transient Integer implicitK; // Expressions to be used as prefilters in knn query private final List filterExpressions; @@ -169,12 +169,12 @@ public Knn( Expression field, Expression query, Expression options, - Integer k, + Integer implicitK, QueryBuilder queryBuilder, List filterExpressions ) { super(source, field, query, options, expressionList(field, query, options), queryBuilder); - this.k = k; + this.implicitK = implicitK; this.filterExpressions = filterExpressions; } @@ -188,15 +188,15 @@ private static List expressionList(Expression field, Expression quer return result; } - public Integer k() { - return k; + public Integer implicitK() { + return implicitK; } public List filterExpressions() { return filterExpressions; } - public Knn replaceK(Integer k) { + public Knn withImplicitK(Integer k) { Check.notNull(k, "k must not be null"); return new Knn(source(), field(), query(), options(), k, queryBuilder(), filterExpressions()); } @@ -214,7 +214,7 @@ public List queryAsObject() { @Override public Expression replaceQueryBuilder(QueryBuilder queryBuilder) { - return new Knn(source(), field(), query(), options(), k(), queryBuilder, filterExpressions()); + return new Knn(source(), field(), query(), options(), implicitK(), queryBuilder, filterExpressions()); } @Override @@ -230,7 +230,7 @@ public Translatable translatable(LucenePushdownPredicates pushdownPredicates) { @Override protected Query translate(LucenePushdownPredicates pushdownPredicates, TranslatorHandler handler) { - assert k() != null : "Knn function must have a k value set before translation"; + assert implicitK() != null : "Knn function must have a k value set before translation"; var fieldAttribute = fieldAsFieldAttribute(field()); Check.notNull(fieldAttribute, "Knn must have a field attribute as the first argument"); @@ -249,7 +249,10 @@ protected Query translate(LucenePushdownPredicates pushdownPredicates, Translato } } - return new KnnQuery(source(), fieldName, queryAsFloats, k(), queryOptions(), filterQueries); + Map options = queryOptions(); + Integer explicitK = (Integer) options.get(K_FIELD.getPreferredName()); + + return new KnnQuery(source(), fieldName, queryAsFloats, explicitK != null ? explicitK : implicitK(), options, filterQueries); } private float[] queryAsFloats() { @@ -262,7 +265,7 @@ private float[] queryAsFloats() { } public Expression withFilters(List filterExpressions) { - return new Knn(source(), field(), query(), options(), k(), queryBuilder(), filterExpressions); + return new Knn(source(), field(), query(), options(), implicitK(), queryBuilder(), filterExpressions); } private Map queryOptions() throws InvalidArgumentException { @@ -287,7 +290,7 @@ protected QueryBuilder evaluatorQueryBuilder() { @Override public void postOptimizationVerification(Failures failures) { // Check that a k has been set - if (k() == null) { + if (implicitK() == null) { failures.add( Failure.fail(this, "Knn function must be used with a LIMIT clause after it to set the number of nearest neighbors to find") ); @@ -301,7 +304,7 @@ public Expression replaceChildren(List newChildren) { newChildren.get(0), newChildren.get(1), newChildren.size() > 2 ? newChildren.get(2) : null, - k(), + implicitK(), queryBuilder(), filterExpressions() ); @@ -309,7 +312,7 @@ public Expression replaceChildren(List newChildren) { @Override protected NodeInfo info() { - return NodeInfo.create(this, Knn::new, field(), query(), options(), k(), queryBuilder(), filterExpressions()); + return NodeInfo.create(this, Knn::new, field(), query(), options(), implicitK(), queryBuilder(), filterExpressions()); } @Override @@ -357,12 +360,14 @@ public boolean equals(Object o) { // ignore options when comparing two Knn functions if (o == null || getClass() != o.getClass()) return false; Knn knn = (Knn) o; - return super.equals(knn) && Objects.equals(k(), knn.k()) && Objects.equals(filterExpressions(), knn.filterExpressions()); + return super.equals(knn) + && Objects.equals(implicitK(), knn.implicitK()) + && Objects.equals(filterExpressions(), knn.filterExpressions()); } @Override public int hashCode() { - return Objects.hash(field(), query(), queryBuilder(), k(), filterExpressions()); + return Objects.hash(field(), query(), queryBuilder(), implicitK(), filterExpressions()); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java index a8503c300bfbc..b3477ff204070 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushLimitToKnn.java @@ -61,7 +61,7 @@ public LogicalPlan rule(Limit limit, LogicalOptimizerContext ctx) { private Expression limitFilterExpressions(Expression condition, Limit limit, LogicalOptimizerContext ctx) { return condition.transformDown(exp -> { if (exp instanceof Knn knn) { - return knn.replaceK((Integer) limit.limit().fold(ctx.foldCtx())); + return knn.withImplicitK((Integer) limit.limit().fold(ctx.foldCtx())); } return exp; }); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java index a13442e4af8a0..3f9bb4408a5df 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/KnnQuery.java @@ -21,7 +21,6 @@ import java.util.Objects; import static org.elasticsearch.index.query.AbstractQueryBuilder.BOOST_FIELD; -import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.K_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VECTOR_SIMILARITY_FIELD; import static org.elasticsearch.search.vectors.KnnVectorQueryBuilder.VISIT_PERCENTAGE_FIELD; @@ -53,10 +52,9 @@ protected QueryBuilder asBuilder() { if (oversample != null) { rescoreVectorBuilder = new RescoreVectorBuilder(oversample); } - Integer k = (Integer) options.get(K_FIELD); Float vectorSimilarity = (Float) options.get(VECTOR_SIMILARITY_FIELD.getPreferredName()); Integer minCandidates = (Integer) options.get(Knn.MIN_CANDIDATES_OPTION); - Float visitPercentage = (Float) options.get(VISIT_PERCENTAGE_FIELD); + Float visitPercentage = (Float) options.get(VISIT_PERCENTAGE_FIELD.getPreferredName()); minCandidates = minCandidates == null ? null : Math.max(minCandidates, k); // TODO: expose visit_percentage in ESQL diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 3cffd78d5fcf9..29a2101a05eb8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1286,7 +1286,8 @@ public void testKnnOptionsPushDown() { String query = """ from test | where KNN(dense_vector, [0.1, 0.2, 0.3], - { "similarity": 0.001, "min_candidates": 5000, "rescore_oversample": 7, "boost": 3.5 }) + {"k": 10, "min_candidates": 20, "rescore_oversample": 1.5, "similarity": 0.5, "boost": 2.0, "visit_percentage": 0.25}) + | limit 50 """; var analyzer = makeAnalyzer("mapping-all-types.json"); var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); @@ -1297,12 +1298,12 @@ public void testKnnOptionsPushDown() { var expectedQuery = new KnnVectorQueryBuilder( "dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, - 5000, - 5000, - null, - new RescoreVectorBuilder(7), - 0.001f - ).boost(3.5f); + 10, + 20, + 0.25f, + new RescoreVectorBuilder(1.5f), + 0.5f + ).boost(2.0f); assertEquals(expectedQuery.toString(), planStr.get()); } @@ -1322,10 +1323,10 @@ public void testKnnUsesLimitForK() { assertEquals(expectedQuery.toString(), planStr.get()); } - public void testKnnKAndMinCandidatesLowerK() { + public void testKnnKOverridesLimitK() { String query = """ from test - | where KNN(dense_vector, [0.1, 0.2, 0.3], {"min_candidates": 50}) + | where KNN(dense_vector, [0.1, 0.2, 0.3], {"k": 20}) | limit 10 """; var analyzer = makeAnalyzer("mapping-all-types.json"); @@ -1334,23 +1335,7 @@ public void testKnnKAndMinCandidatesLowerK() { AtomicReference planStr = new AtomicReference<>(); plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); - var expectedQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, 50, 50, null, null, null); - assertEquals(expectedQuery.toString(), planStr.get()); - } - - public void testKnnKAndMinCandidatesHigherK() { - String query = """ - from test - | where KNN(dense_vector, [0.1, 0.2, 0.3], {"min_candidates": 10}) - | limit 50 - """; - var analyzer = makeAnalyzer("mapping-all-types.json"); - var plan = plannerOptimizer.plan(query, IS_SV_STATS, analyzer); - - AtomicReference planStr = new AtomicReference<>(); - plan.forEachDown(EsQueryExec.class, result -> planStr.set(result.query().toString())); - - var expectedQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, 50, 50, null, null, null); + var expectedQuery = new KnnVectorQueryBuilder("dense_vector", new float[] { 0.1f, 0.2f, 0.3f }, 20, null, null, null, null); assertEquals(expectedQuery.toString(), planStr.get()); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index ccf2a923efc7f..9686843a81d59 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8846,7 +8846,7 @@ public void testKnnImplicitLimit() { var limit = as(optimized, Limit.class); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - assertThat(knn.k(), equalTo(1000)); + assertThat(knn.implicitK(), equalTo(1000)); } public void testKnnWithLimit() { @@ -8860,7 +8860,7 @@ public void testKnnWithLimit() { var limit = as(optimized, Limit.class); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - assertThat(knn.k(), equalTo(10)); + assertThat(knn.implicitK(), equalTo(10)); } public void testKnnWithTopN() { @@ -8875,7 +8875,7 @@ public void testKnnWithTopN() { var topN = as(optimized, TopN.class); var filter = as(topN.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - assertThat(knn.k(), equalTo(10)); + assertThat(knn.implicitK(), equalTo(10)); } public void testKnnWithMultipleLimitsAfterTopN() { @@ -8893,7 +8893,7 @@ public void testKnnWithMultipleLimitsAfterTopN() { var limit = as(topN.child(), Limit.class); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - assertThat(knn.k(), equalTo(20)); + assertThat(knn.implicitK(), equalTo(20)); } public void testKnnWithMultipleLimitsCombined() { @@ -8909,7 +8909,7 @@ public void testKnnWithMultipleLimitsCombined() { assertThat(limit.limit().fold(FoldContext.small()), equalTo(10)); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - assertThat(knn.k(), equalTo(10)); + assertThat(knn.implicitK(), equalTo(10)); } public void testKnnWithMultipleClauses() { @@ -8965,7 +8965,7 @@ public void testKnnWithRerankAmdLimit() { assertThat(limit.limit().fold(FoldContext.small()), equalTo(100)); var filter = as(limit.child(), Filter.class); var knn = as(filter.condition(), Knn.class); - assertThat(knn.k(), equalTo(100)); + assertThat(knn.implicitK(), equalTo(100)); } private LogicalPlanOptimizer getCustomRulesLogicalPlanOptimizer( From 4124dc952a880c4611053410ff8de062677a735a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Nov 2025 16:06:20 +0100 Subject: [PATCH 4/7] Fix tests --- .../testFixtures/src/main/resources/knn-function.csv-spec | 2 +- .../org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 1c30aebffb2ea..06529082e0262 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -229,7 +229,7 @@ required_capability: knn_function_v5 from colors metadata _score | eval composed_name = locate(color, " ") > 0 -| where knn(rgb_vector, [128,128,0], {"min_candidates": 100}) and composed_name == false +| where knn(rgb_vector, [128,128,0], {"k": 100}) and composed_name == false | sort _score desc, color asc | keep color, composed_name | limit 10 diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index af5fa7cb72db0..111f7f0c8ee72 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -113,16 +113,16 @@ public void testKnnDefaults() { } } - public void testKnnOptions() { + public void testKnnKOverridesLimit() { float[] queryVector = new float[numDims]; Arrays.fill(queryVector, 0.0f); var query = String.format(Locale.ROOT, """ FROM test METADATA _score - | WHERE knn(vector, %s) + | WHERE knn(vector, %s, {"k": 5, "min_candidates": 20}) | KEEP id, _score, vector | SORT _score DESC - | LIMIT 5 + | LIMIT 10 """, Arrays.toString(queryVector)); try (var resp = run(query)) { From 8deef32277a33a0f49e004332d0542a538dc5b5f Mon Sep 17 00:00:00 2001 From: Carlos Delgado <6339205+carlosdelest@users.noreply.github.com> Date: Thu, 20 Nov 2025 16:08:45 +0100 Subject: [PATCH 5/7] Update docs/changelog/138372.yaml --- docs/changelog/138372.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/138372.yaml diff --git a/docs/changelog/138372.yaml b/docs/changelog/138372.yaml new file mode 100644 index 0000000000000..4470d3ff2ff3c --- /dev/null +++ b/docs/changelog/138372.yaml @@ -0,0 +1,5 @@ +pr: 138372 +summary: ES|QL - KNN function option changes +area: "ES|QL, ES|QL" +type: enhancement +issues: [] From d61c1466daf0d3ab793c6ee238285d3058c2d688 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Nov 2025 16:17:33 +0100 Subject: [PATCH 6/7] Update changelog --- docs/changelog/138372.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changelog/138372.yaml b/docs/changelog/138372.yaml index 4470d3ff2ff3c..78995a2b0fa7a 100644 --- a/docs/changelog/138372.yaml +++ b/docs/changelog/138372.yaml @@ -1,5 +1,5 @@ pr: 138372 -summary: ES|QL - KNN function option changes -area: "ES|QL, ES|QL" +summary: ES|QL - KNN function options support k and visit_percentage parameters +area: "ES|QL" type: enhancement issues: [] From 2931121ccebe69fab450a8572212becf898152d2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 20 Nov 2025 16:29:49 +0100 Subject: [PATCH 7/7] Add capability for bwc --- .../qa/testFixtures/src/main/resources/knn-function.csv-spec | 1 + .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 06529082e0262..905952e0a2ce6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -226,6 +226,7 @@ azure | [240.0, 255.0, 255.0] knnWithNonPushableConjunction required_capability: knn_function_v5 +required_capability: knn_function_options_k_visit_percentage from colors metadata _score | eval composed_name = locate(color, " ") > 0 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index d7683fe379d06..da67836e3146b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1692,6 +1692,10 @@ public enum Cap { */ TIME_SERIES_WINDOW_V0, + /** + * KNN function adds support for k and visit_percentage options + */ + KNN_FUNCTION_OPTIONS_K_VISIT_PERCENTAGE, // Last capability should still have a comma for fewer merge conflicts when adding new ones :) // This comment prevents the semicolon from being on the previous capability when Spotless formats the file. ;