From e8dd1e8e98403eca820de5a44880e91747dcbf01 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Fri, 29 Aug 2025 13:29:22 +0200 Subject: [PATCH 01/16] Accept text field type for knn --- .../xpack/esql/core/expression/Expression.java | 4 ++++ .../xpack/esql/expression/function/vector/Knn.java | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expression.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expression.java index b254612a700df..3b37a97daaf5e 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expression.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Expression.java @@ -55,6 +55,10 @@ public TypeResolution and(TypeResolution other) { return failed ? this : other; } + public TypeResolution or(TypeResolution other) { + return failed ? other : this; + } + public TypeResolution and(Supplier other) { return failed ? this : other.get(); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 0b64fb43909df..2baa1c757c21e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -67,10 +67,14 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; +import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; import static org.elasticsearch.xpack.esql.expression.Foldables.TypeResolutionValidator.forPreOptimizationValidation; import static org.elasticsearch.xpack.esql.expression.Foldables.resolveTypeQuery; public class Knn extends FullTextFunction implements OptionalArgument, VectorFunction, PostAnalysisPlanVerificationAware { + + private static final String[] ACCEPTED_FIELD_TYPES = { "dense_vector", "semantic_text" }; private final Logger log = LogManager.getLogger(getClass()); public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Knn", Knn::readFrom); @@ -209,7 +213,11 @@ protected TypeResolution resolveParams() { } private TypeResolution resolveField() { - return isNotNull(field(), sourceText(), FIRST).and(isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, "dense_vector")); + return isNotNull(field(), sourceText(), FIRST).and( + isType(field(), dt -> dt == TEXT, sourceText(), FIRST, ACCEPTED_FIELD_TYPES).or( + isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, ACCEPTED_FIELD_TYPES) + ) + ); } private TypeResolution resolveQuery() { From 9db94aaffd8a10cc2f65f6eccc02444130d8fadc Mon Sep 17 00:00:00 2001 From: cdelgado Date: Fri, 29 Aug 2025 13:33:43 +0200 Subject: [PATCH 02/16] Add tests and service infrastructure --- .../xpack/esql/core/type/DataType.java | 3 ++ .../xpack/esql/qa/rest/EsqlSpecTestCase.java | 9 +++- .../xpack/esql/CsvTestsDataLoader.java | 31 +++++++++++- .../src/main/resources/data/semantic_text.csv | 8 ++-- .../src/main/resources/knn-function.csv-spec | 48 ++++++++++++------- .../resources/mapping-full_text_search.json | 4 ++ .../main/resources/mapping-semantic_text.json | 4 ++ .../elasticsearch/xpack/esql/CsvTests.java | 2 +- .../xpack/esql/analysis/AnalyzerTests.java | 2 +- .../xpack/esql/analysis/VerifierTests.java | 26 ++++++---- 10 files changed, 104 insertions(+), 33 deletions(-) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index bde8af3c4e6d1..eebf93ecfacc4 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -392,6 +392,9 @@ public enum DataType { // ES calls this 'point', but ESQL calls it 'cartesian_point' map.put("point", DataType.CARTESIAN_POINT); map.put("shape", DataType.CARTESIAN_SHAPE); + // semantic_text is returned as text by field_caps, but unit tests will retrieve it from the mapping + // so we need to map it here as well + map.put("semantic_text", DataType.TEXT); ES_TO_TYPE = Collections.unmodifiableMap(map); // DATETIME has different esType and typeName, add an entry in NAME_TO_TYPE with date as key map = TYPES.stream().collect(toMap(DataType::typeName, t -> t)); diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java index 7e0bd6031f455..8c53bce5faf0a 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java @@ -73,6 +73,7 @@ import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.loadDataSetIntoEs; import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.COMPLETION; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.KNN_FUNCTION_V4; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METRICS_COMMAND; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.RERANK; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.SEMANTIC_TEXT_FIELD_CAPS; @@ -211,8 +212,12 @@ protected boolean supportsInferenceTestService() { } protected boolean requiresInferenceEndpoint() { - return Stream.of(SEMANTIC_TEXT_FIELD_CAPS.capabilityName(), RERANK.capabilityName(), COMPLETION.capabilityName()) - .anyMatch(testCase.requiredCapabilities::contains); + return Stream.of( + SEMANTIC_TEXT_FIELD_CAPS.capabilityName(), + RERANK.capabilityName(), + COMPLETION.capabilityName(), + KNN_FUNCTION_V4.capabilityName() + ).anyMatch(testCase.requiredCapabilities::contains); } protected boolean supportsIndexModeLookup() throws IOException { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index c56ed4d489843..765e9d9173e87 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -415,6 +415,10 @@ public static void createInferenceEndpoints(RestClient client) throws IOExceptio createSparseEmbeddingInferenceEndpoint(client); } + if (clusterHasTextEmbeddingInferenceEndpoint(client) == false) { + createTextEmbeddingInferenceEndpoint(client); + } + if (clusterHasRerankInferenceEndpoint(client) == false) { createRerankInferenceEndpoint(client); } @@ -426,11 +430,12 @@ public static void createInferenceEndpoints(RestClient client) throws IOExceptio public static void deleteInferenceEndpoints(RestClient client) throws IOException { deleteSparseEmbeddingInferenceEndpoint(client); + deleteTextEmbeddingInferenceEndpoint(client); deleteRerankInferenceEndpoint(client); deleteCompletionInferenceEndpoint(client); } - /** The semantic_text mapping type require an inference endpoint that needs to be setup before creating the index. */ + /** The semantic_text mapping type requires inference endpoints that need to be setup before creating the index. */ public static void createSparseEmbeddingInferenceEndpoint(RestClient client) throws IOException { createInferenceEndpoint(client, TaskType.SPARSE_EMBEDDING, "test_sparse_inference", """ { @@ -441,14 +446,38 @@ public static void createSparseEmbeddingInferenceEndpoint(RestClient client) thr """); } + public static void createTextEmbeddingInferenceEndpoint(RestClient client) throws IOException { + createInferenceEndpoint(client, TaskType.TEXT_EMBEDDING, "test_dense_inference", """ + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64", + "dimensions": 3, + "similarity": "l2_norm", + "element_type": "float" + }, + "task_settings": { } + } + """); + } + public static void deleteSparseEmbeddingInferenceEndpoint(RestClient client) throws IOException { deleteInferenceEndpoint(client, "test_sparse_inference"); } + public static void deleteTextEmbeddingInferenceEndpoint(RestClient client) throws IOException { + deleteInferenceEndpoint(client, "test_dense_inference"); + } + public static boolean clusterHasSparseEmbeddingInferenceEndpoint(RestClient client) throws IOException { return clusterHasInferenceEndpoint(client, TaskType.SPARSE_EMBEDDING, "test_sparse_inference"); } + public static boolean clusterHasTextEmbeddingInferenceEndpoint(RestClient client) throws IOException { + return clusterHasInferenceEndpoint(client, TaskType.TEXT_EMBEDDING, "test_dense_inference"); + } + public static void createRerankInferenceEndpoint(RestClient client) throws IOException { createInferenceEndpoint(client, TaskType.RERANK, "test_reranker", """ { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/semantic_text.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/semantic_text.csv index f79e44ab67ca3..fae7715a860c2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/semantic_text.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/semantic_text.csv @@ -1,4 +1,4 @@ -_id:keyword,semantic_text_field:semantic_text,st_bool:semantic_text,st_cartesian_point:semantic_text,st_cartesian_shape:semantic_text,st_datetime:semantic_text,st_double:semantic_text,st_geopoint:semantic_text,st_geoshape:semantic_text,st_integer:semantic_text,st_ip:semantic_text,st_long:semantic_text,st_unsigned_long:semantic_text,st_version:semantic_text,st_multi_value:semantic_text,st_unicode:semantic_text,host:keyword,description:text,value:long,st_base64:semantic_text,st_logs:semantic_text,language_name:keyword -1,live long and prosper,false,"POINT(4297.11 -1475.53)",,1953-09-02T00:00:00.000Z,5.20128E11,"POINT(42.97109630194 14.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",23,1.1.1.1,2147483648,2147483648,1.2.3,["Hello there!", "This is a random value", "for testing purposes"],你吃饭了吗,"host1","some description1",1001,ZWxhc3RpYw==,"2024-12-23T12:15:00.000Z 1.2.3.4 example@example.com 4553",English -2,all we have to decide is what to do with the time that is given to us,true,"POINT(7580.93 2272.77)",,2023-09-24T15:57:00.000Z,4541.11,"POINT(37.97109630194 21.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",122,1.1.2.1,123,2147483648.2,9.0.0,["nice to meet you", "bye bye!"],["谢谢", "对不起我的中文不好"],"host2","some description2",1002,aGVsbG8=,"2024-01-23T12:15:00.000Z 1.2.3.4 foo@example.com 42",French -3,be excellent to each other,,,,,,,,,,,,,,,"host3","some description3",1003,,"2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42",Spanish +_id:keyword,semantic_text_field:semantic_text,semantic_text_dense_field:semantic_text,st_bool:semantic_text,st_cartesian_point:semantic_text,st_cartesian_shape:semantic_text,st_datetime:semantic_text,st_double:semantic_text,st_geopoint:semantic_text,st_geoshape:semantic_text,st_integer:semantic_text,st_ip:semantic_text,st_long:semantic_text,st_unsigned_long:semantic_text,st_version:semantic_text,st_multi_value:semantic_text,st_unicode:semantic_text,host:keyword,description:text,value:long,st_base64:semantic_text,st_logs:semantic_text,language_name:keyword +1,live long and prosper,live long and prosper,false,"POINT(4297.11 -1475.53)",,1953-09-02T00:00:00.000Z,5.20128E11,"POINT(42.97109630194 14.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",23,1.1.1.1,2147483648,2147483648,1.2.3,["Hello there!", "This is a random value", "for testing purposes"],你吃饭了吗,"host1","some description1",1001,ZWxhc3RpYw==,"2024-12-23T12:15:00.000Z 1.2.3.4 example@example.com 4553",English +2,all we have to decide is what to do with the time that is given to us,all we have to decide is what to do with the time that is given to us,true,"POINT(7580.93 2272.77)",,2023-09-24T15:57:00.000Z,4541.11,"POINT(37.97109630194 21.7552534413725)","POLYGON ((30 10\, 40 40\, 20 40\, 10 20\, 30 10))",122,1.1.2.1,123,2147483648.2,9.0.0,["nice to meet you", "bye bye!"],["谢谢", "对不起我的中文不好"],"host2","some description2",1002,aGVsbG8=,"2024-01-23T12:15:00.000Z 1.2.3.4 foo@example.com 42",French +3,be excellent to each other,be excellent to each other,,,,,,,,,,,,,,,"host3","some description3",1003,,"2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42",Spanish diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 2cad34e324fda..ab90d30803233 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -3,7 +3,7 @@ # top-n query at the shard level knnSearch -required_capability: knn_function_v3 +required_capability: knn_function_v4 // tag::knn-function[] from colors metadata _score @@ -30,7 +30,7 @@ chartreuse | [127.0, 255.0, 0.0] ; knnSearchWithSimilarityOption -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [255,192,203], 140, {"similarity": 40}) @@ -46,7 +46,7 @@ wheat | [245.0, 222.0, 179.0] ; knnHybridSearch -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where match(color, "blue") or knn(rgb_vector, [65,105,225], 10) @@ -68,7 +68,7 @@ yellow | [255.0, 255.0, 0.0] ; knnWithPrefilter -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors | where knn(rgb_vector, [120,180,0], 10) and (match(color, "olive") or match(color, "green")) @@ -82,7 +82,7 @@ olive ; knnWithNegatedPrefilter -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [128,128,0], 10) and not (match(color, "olive") or match(color, "chocolate")) @@ -105,7 +105,7 @@ orange | [255.0, 165.0, 0.0] ; knnAfterKeep -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | keep rgb_vector, color, _score @@ -124,7 +124,7 @@ rgb_vector:dense_vector ; knnAfterDrop -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | drop primary @@ -143,7 +143,7 @@ lime | [0.0, 255.0, 0.0] ; knnAfterEval -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | eval composed_name = locate(color, " ") > 0 @@ -162,7 +162,7 @@ golden rod | true ; knnWithConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [255,255,238], 10) and hex_code like "#FFF*" @@ -181,7 +181,7 @@ yellow | #FFFF00 | [255.0, 255.0, 0.0] ; knnWithDisjunctionAndFiltersConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where (knn(rgb_vector, [0,255,255], 140) or knn(rgb_vector, [128, 0, 255], 10)) and primary == true @@ -204,7 +204,7 @@ yellow | [255.0, 255.0, 0.0] ; knnWithNegationsAndFiltersConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where (knn(rgb_vector, [0,255,255], 140) and not(primary == true and match(color, "blue"))) @@ -227,7 +227,7 @@ azure | [240.0, 255.0, 255.0] ; knnWithNonPushableConjunction -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | eval composed_name = locate(color, " ") > 0 @@ -251,7 +251,7 @@ maroon | false ; testKnnWithNonPushableDisjunctions -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where knn(rgb_vector, [128,128,0], 140, {"similarity": 30}) or length(color) > 10 @@ -267,7 +267,7 @@ papaya whip ; testKnnWithNonPushableDisjunctionsOnComplexExpressions -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors metadata _score | where (knn(rgb_vector, [128,128,0], 140, {"similarity": 70}) and length(color) < 10) or (knn(rgb_vector, [128,0,128], 140, {"similarity": 60}) and primary == false) @@ -282,7 +282,7 @@ indigo | false ; testKnnInStatsNonPushable -required_capability: knn_function_v3 +required_capability: knn_function_v4 from colors | where length(color) < 10 @@ -294,7 +294,7 @@ c: long ; testKnnInStatsWithGrouping -required_capability: knn_function_v3 +required_capability: knn_function_v4 required_capability: full_text_functions_in_stats_where from colors @@ -306,3 +306,19 @@ c: long | primary: boolean 41 | false 9 | true ; + +testKnnWithWithSemanticText +required_capability: knn_function_v4 +required_capability: semantic_text_field_caps + +from semantic_text +| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| keep semantic_text_dense_field +| sort semantic_text_dense_field asc +; + +semantic_text_dense_field:text +all we have to decide is what to do with the time that is given to us +be excellent to each other +live long and prosper +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json index 160f285d792d1..a5458bc3f3074 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-full_text_search.json @@ -21,6 +21,10 @@ "vector": { "type": "dense_vector", "similarity": "l2_norm" + }, + "semantic": { + "type": "semantic_text", + "inference_id": "test_inference" } } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-semantic_text.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-semantic_text.json index 5fa25e01ef0e4..d4a6fff384287 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-semantic_text.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-semantic_text.json @@ -4,6 +4,10 @@ "type": "semantic_text", "inference_id": "test_sparse_inference" }, + "semantic_text_dense_field": { + "type": "semantic_text", + "inference_id": "test_dense_inference" + }, "st_bool": { "type": "semantic_text", "inference_id": "test_sparse_inference" diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 869a851a1fb34..97429ea091053 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -305,7 +305,7 @@ public final void test() throws Throwable { ); assumeFalse( "can't use KNN function in csv tests", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KNN_FUNCTION_V3.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KNN_FUNCTION_V4.capabilityName()) ); assumeFalse( "lookup join disabled for csv tests", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 52ea22dee1d6b..e605b0d777071 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2349,7 +2349,7 @@ public void testImplicitCasting() { public void testDenseVectorImplicitCastingKnn() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); checkDenseVectorCastingKnn("float_vector"); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 594148826b432..c1ca85393185f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1251,7 +1251,7 @@ public void testFieldBasedFullTextFunctions() throws Exception { checkFieldBasedWithNonIndexedColumn("Term", "term(text, \"cat\")", "function"); checkFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(title, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3], 10)"); } } @@ -1384,7 +1384,7 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Meditation\", title, body)", "function"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [0, 1, 2], 10)", "function"); } @@ -1439,7 +1439,7 @@ public void testFullTextFunctionsDisjunctions() { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkWithFullTextFunctionsDisjunctions("term(title, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3], 10)"); } } @@ -1504,7 +1504,7 @@ public void testFullTextFunctionsWithNonBooleanFunctions() { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(title, \"Meditation\")", "function"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3], 10)", "function"); } } @@ -1575,7 +1575,7 @@ public void testFullTextFunctionsTargetsExistingField() throws Exception { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionTargetsExistingField("term(fist_name, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { testFullTextFunctionTargetsExistingField("knn(vector, [0, 1, 2], 10)"); } } @@ -2164,7 +2164,7 @@ public void testFullTextFunctionOptions() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", title, body, {\"%s\": %s})"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], 10, {\"%s\": %s})"); } } @@ -2257,7 +2257,7 @@ public void testFullTextFunctionsNullArgs() throws Exception { checkFullTextFunctionNullArgs("term(null, \"query\")", "first"); checkFullTextFunctionNullArgs("term(title, null)", "second"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionNullArgs("knn(null, [0, 1, 2], 10)", "first"); checkFullTextFunctionNullArgs("knn(vector, null, 10)", "second"); checkFullTextFunctionNullArgs("knn(vector, [0, 1, 2], null)", "third"); @@ -2289,7 +2289,7 @@ public void testFullTextFunctionsInStats() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsInStats("multi_match(\"Meditation\", title, body)"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { checkFullTextFunctionsInStats("knn(vector, [0, 1, 2], 10)"); } } @@ -2351,6 +2351,16 @@ public void testVectorSimilarityFunctionsNullArgs() throws Exception { } } + public void testFullTextFunctionsWithSemanticText() { + checkFullTextFunctionsWithSemanticText("knn(semantic, [0, 1, 2], 10)"); + checkFullTextFunctionsWithSemanticText("match(semantic, \"hello world\")"); + checkFullTextFunctionsWithSemanticText("semantic:\"hello world\""); + } + + public void checkFullTextFunctionsWithSemanticText(String functionInvocation) { + query("from test | where " + functionInvocation, fullTextAnalyzer); + } + public void testToIPInvalidOptions() { String query = "ROW result = to_ip(\"127.0.0.1\", 123)"; assertThat(error(query), containsString("second argument of [to_ip(\"127.0.0.1\", 123)] must be a map expression, received [123]")); From 0e264b15527e26736b68689470d150e859ecde95 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Fri, 29 Aug 2025 13:34:12 +0200 Subject: [PATCH 03/16] Bump capability --- .../xpack/esql/plugin/KnnFunctionIT.java | 2 +- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../function/vector/VectorWritables.java | 2 +- .../expression/function/fulltext/KnnTests.java | 2 +- .../LocalPhysicalPlanOptimizerTests.java | 18 +++++++++--------- .../optimizer/LogicalPlanOptimizerTests.java | 12 ++++++------ 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index d44a9b458b082..6ce9b9eb049a0 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -193,7 +193,7 @@ public void testKnnWithLookupJoin() { @Before public void setup() throws IOException { - assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var indexName = "test"; var client = client().admin().indices(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index c3c4121b095f4..19229b6e5955b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1274,7 +1274,7 @@ public enum Cap { /** * Support knn function */ - KNN_FUNCTION_V3(Build.current().isSnapshot()), + KNN_FUNCTION_V4(Build.current().isSnapshot()), /** * Support for the LIKE operator with a list of wildcards. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java index f4353c28476d2..ab41201ceb328 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java @@ -27,7 +27,7 @@ private VectorWritables() { public static List getNamedWritables() { List entries = new ArrayList<>(); - if (EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { entries.add(Knn.ENTRY); } if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index 002c519b001f8..b2770c51991b5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -52,7 +52,7 @@ public static Iterable parameters() { @Before public void checkCapability() { - assumeTrue("KNN is not enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("KNN is not enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); } private static List testCaseSuppliers() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 9b161388d6cc3..89b968d80304e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1376,7 +1376,7 @@ public void testMultiMatchOptionsPushDown() { public void testKnnOptionsPushDown() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1842,7 +1842,7 @@ public void testFullTextFunctionWithStatsBy(FullTextFunctionTestCase testCase) { } public void testKnnPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1874,7 +1874,7 @@ public void testKnnPrefilters() { } public void testKnnPrefiltersWithMultipleFilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1910,7 +1910,7 @@ public void testKnnPrefiltersWithMultipleFilters() { } public void testPushDownConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1947,7 +1947,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { } public void testPushDownNegatedConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -1984,7 +1984,7 @@ public void testPushDownNegatedConjunctionsToKnnPrefilter() { } public void testNotPushDownDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -2013,7 +2013,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { } public void testNotPushDownKnnWithNonPushablePrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -2047,7 +2047,7 @@ public void testNotPushDownKnnWithNonPushablePrefilters() { } public void testPushDownComplexNegationsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test @@ -2097,7 +2097,7 @@ and NOT ((keyword == "test") or knn(dense_vector, [4, 5, 6], 10))) } public void testMultipleKnnQueriesInPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); String query = """ from test diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index c8a64f779ab4a..9db78e6b27aed 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8435,7 +8435,7 @@ public void testSampleNoPushDownChangePoint() { } public void testPushDownConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8455,7 +8455,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { } public void testPushDownMultipleFiltersToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8478,7 +8478,7 @@ public void testPushDownMultipleFiltersToKnnPrefilter() { } public void testNotPushDownDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); var query = """ from test @@ -8495,7 +8495,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { } public void testPushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); /* and @@ -8530,7 +8530,7 @@ public void testPushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { } public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); /* or @@ -8562,7 +8562,7 @@ public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { } public void testMultipleKnnQueriesInPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V3.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); /* and From 66db5c94645ba961b71324c80b67f1da0a93cd42 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 29 Aug 2025 11:48:01 +0000 Subject: [PATCH 04/16] [CI] Auto commit changes from spotless --- .../elasticsearch/xpack/esql/expression/function/vector/Knn.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 2baa1c757c21e..44eaeea1ea7d8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -67,7 +67,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; -import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; import static org.elasticsearch.xpack.esql.expression.Foldables.TypeResolutionValidator.forPreOptimizationValidation; import static org.elasticsearch.xpack.esql.expression.Foldables.resolveTypeQuery; From 643c767386d43d085d59008d0ecaf661cd00b3bf Mon Sep 17 00:00:00 2001 From: cdelgado Date: Fri, 29 Aug 2025 18:30:34 +0200 Subject: [PATCH 05/16] First IT test for knn with semantic_text --- .../qa/single_node/KnnSemanticTextIT.java | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java new file mode 100644 index 0000000000000..8747778195bf2 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.single_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.client.Request; +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xpack.esql.AssertWarnings; +import org.elasticsearch.xpack.esql.CsvTestsDataLoader; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.qa.rest.ProfileLogger; +import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder; +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.runEsqlSync; +import static org.hamcrest.Matchers.is; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class KnnSemanticTextIT extends ESRestTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test")); + + @Rule(order = Integer.MIN_VALUE) + public ProfileLogger profileLogger = new ProfileLogger(); + + private int numDocs; + private final Map indexedTexts = new HashMap<>(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @Before + public void checkCapability() { + assumeTrue("semantic text capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + } + + public void testKnnQuery() throws IOException { + String knnQuery = """ + FROM semantic-test METADATA _score + | WHERE knn(semantic, [0, 1, 2], 10) + | KEEP id, _score, semantic + | SORT _score DESC + | LIMIT 10 + """; + + Map response = runEsqlQuery(knnQuery); + @SuppressWarnings("unchecked") + List> columns = (List>) response.get("columns"); + assertThat(columns.size(), is(3)); + } + + @Before + public void setupIndex() throws IOException { + Request request = new Request("PUT", "/semantic-test"); + request.setJsonEntity(""" + { + "mappings": { + "properties": { + "id": { + "type": "integer" + }, + "semantic": { + "type": "semantic_text", + "inference_id": "test_dense_inference" + } + } + }, + "settings": { + "index": { + "number_of_shards": 1, + "number_of_replicas": 0 + } + } + } + """); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + + request = new Request("POST", "/_bulk?index=semantic-test&refresh=true"); + // 4 documents with a null in the middle, leading to 3 ESQL pages and 3 Arrow batches + request.setJsonEntity(""" + {"index": {"_id": "1"}} + {"id": 1, "semantic": "sample text one"} + {"index": {"_id": "2"}} + {"id": 2, "semantic": "sample text two"} + {"index": {"_id": "3"}} + {"id": 3, "semantic": "sample text three"} + """); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + } + + @Before + public void setupInferenceEndpoint() throws IOException { + CsvTestsDataLoader.createTextEmbeddingInferenceEndpoint(client()); + } + + @After + public void removeIndexAndInferenceEndpoint() throws IOException { + client().performRequest(new Request("DELETE", "semantic-test")); + + if (CsvTestsDataLoader.clusterHasTextEmbeddingInferenceEndpoint(client())) { + CsvTestsDataLoader.deleteTextEmbeddingInferenceEndpoint(client()); + } + } + + private Map runEsqlQuery(String query) throws IOException { + RestEsqlTestCase.RequestObjectBuilder builder = requestObjectBuilder().query(query); + return runEsqlSync(builder, new AssertWarnings.NoWarnings(), profileLogger); + } +} From 3a8ef30a7db1e6fb4c568ba33a88a44a6cf17a36 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 15:28:54 +0200 Subject: [PATCH 06/16] Add test for text fields --- .../qa/single_node/KnnSemanticTextIT.java | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java index 8747778195bf2..4aae03ec03ea8 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java @@ -10,6 +10,7 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.elasticsearch.client.Request; +import org.elasticsearch.client.ResponseException; import org.elasticsearch.test.TestClustersThreadFilter; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.rest.ESRestTestCase; @@ -28,9 +29,11 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.rest.RestStatus.BAD_REQUEST; import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder; import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.runEsqlSync; import static org.hamcrest.Matchers.is; +import static org.hamcrest.core.StringContains.containsString; @ThreadLeakFilters(filters = TestClustersThreadFilter.class) public class KnnSemanticTextIT extends ESRestTestCase { @@ -51,10 +54,11 @@ protected String getTestRestCluster() { @Before public void checkCapability() { - assumeTrue("semantic text capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn with semantic text not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); } - public void testKnnQuery() throws IOException { + @SuppressWarnings("unchecked") + public void testKnnQueryWithSemanticText() throws IOException { String knnQuery = """ FROM semantic-test METADATA _score | WHERE knn(semantic, [0, 1, 2], 10) @@ -64,9 +68,29 @@ public void testKnnQuery() throws IOException { """; Map response = runEsqlQuery(knnQuery); - @SuppressWarnings("unchecked") List> columns = (List>) response.get("columns"); assertThat(columns.size(), is(3)); + List> rows = (List>) response.get("values"); + assertThat(rows.size(), is(3)); + for (int row = 0; row < rows.size(); row++) { + List rowData = rows.get(row); + Integer id = (Integer) rowData.get(0); + assertThat(id, is(3 - row)); + } + } + + public void testKnnQueryOnTextField() throws IOException { + String knnQuery = """ + FROM semantic-test METADATA _score + | WHERE knn(text, [0, 1, 2], 10) + | KEEP id, _score, semantic + | SORT _score DESC + | LIMIT 10 + """; + + ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(knnQuery)); + assertThat(re.getResponse().getStatusLine().getStatusCode(), is(BAD_REQUEST.getStatus())); + assertThat(re.getMessage(), containsString("[knn] queries are only supported on [dense_vector] fields")); } @Before @@ -82,6 +106,10 @@ public void setupIndex() throws IOException { "semantic": { "type": "semantic_text", "inference_id": "test_dense_inference" + }, + "text": { + "type": "text", + "copy_to": "semantic" } } }, @@ -99,11 +127,11 @@ public void setupIndex() throws IOException { // 4 documents with a null in the middle, leading to 3 ESQL pages and 3 Arrow batches request.setJsonEntity(""" {"index": {"_id": "1"}} - {"id": 1, "semantic": "sample text one"} + {"id": 1, "text": "sample text"} {"index": {"_id": "2"}} - {"id": 2, "semantic": "sample text two"} + {"id": 2, "text": "another sample text"} {"index": {"_id": "3"}} - {"id": 3, "semantic": "sample text three"} + {"id": 3, "text": "yet another sample text"} """); assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); } From fe6d2b38d74752ecb78f03c4ea00a6abba7b6c0e Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 16:35:31 +0200 Subject: [PATCH 07/16] Move semantic text tests to a separate field --- .../knn-function-semantic-text.csv-spec | 278 ++++++++++++++++++ .../src/main/resources/knn-function.csv-spec | 16 - 2 files changed, 278 insertions(+), 16 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec new file mode 100644 index 0000000000000..3535203ff2ade --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec @@ -0,0 +1,278 @@ +testKnnWithSemanticText +required_capability: knn_function_v4 + +from semantic_text +| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| keep semantic_text_dense_field +| sort semantic_text_dense_field asc +; + +semantic_text_dense_field:text +all we have to decide is what to do with the time that is given to us +be excellent to each other +live long and prosper +; + +testKnnWithSemanticTextAndKeyword +required_capability: knn_function_v4 + +from semantic_text +| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| keep semantic_text_dense_field, host +| sort host asc +; + +semantic_text_dense_field:text | host:keyword +live long and prosper | host1 +all we have to decide is what to do with the time that is given to us | host2 +be excellent to each other | host3 + +; + +testKnnWithSemanticTextMultiValueField +required_capability: knn_function_v4 + +from semantic_text metadata _id +| where match(st_multi_value, "something") AND match(host, "host1") +| keep _id, st_multi_value +; + +_id: keyword | st_multi_value:text +1 | ["Hello there!", "This is a random value", "for testing purposes"] +; + +testKnnWithSemanticTextWithEvalsAndOtherFunctionsAndStats +required_capability: knn_function_v4 + +from semantic_text +| where qstr("description:some*") +| eval size = mv_count(st_multi_value) +| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| STATS result = count(*) +; + +result:long +3 +; + +testKnnWithSemanticTextAndKql +required_capability: knn_function_v4 +required_capability: kql_function + +from semantic_text +| where kql("host:host1") AND knn(semantic_text_dense_field, [0, 1, 2], 10) +| KEEP host, semantic_text_dense_field +; + +host:keyword | semantic_text_dense_field:text +"host1" | live long and prosper +; + +testKnnWithOptionsFuzziness +required_capability: knn_function_v4 +required_capability: knn_function_v4_options + +from books +| where match(title, "Pings", {"fuzziness": 1}) +| keep book_no; +ignoreOrder:true + +book_no:keyword +2714 +2675 +4023 +7140 +; + +testKnnWithOptionsOperator +required_capability: knn_function_v4 +required_capability: knn_function_v4_options + +// tag::match-with-named-function-params[] +FROM books +| WHERE MATCH(title, "Hobbit Back Again", {"operator": "AND"}) +| KEEP title; +// end::match-with-named-function-params[] + +// tag::match-with-named-function-params-result[] +title:text +The Hobbit or There and Back Again +// end::match-with-named-function-params-result[] +; + +testKnnWithOptionsMinimumShouldKnn +required_capability: knn_function_v4 +required_capability: knn_function_v4_options + +from books +| where match(title, "Hobbit Back Again", {"minimum_should_match": 2}) +| keep title; + +title:text +The Hobbit or There and Back Again +; + +testKnnWithNonPushableDisjunctions +required_capability: knn_function_v4 +required_capability: full_text_functions_disjunctions_compute_engine + +from books +| where match(title, "lord") or length(title) > 130 +| keep book_no +; +ignoreOrder: true + +book_no:keyword +2675 +2714 +4023 +7140 +8678 +; + +testKnnWithNonPushableDisjunctionsOnComplexExpressions +required_capability: knn_function_v4 +required_capability: full_text_functions_disjunctions_compute_engine + +from books +| where (match(title, "lord") and ratings > 4.5) or (match(author, "dostoevsky") and length(title) > 50) +| keep book_no +; +ignoreOrder: true + +book_no:keyword +2675 +2924 +4023 +1937 +7140 +2714 +; + +testKnnInStatsNonPushable +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where + +from books +| where length(title) > 40 +| stats c = count(*) where match(title, "Lord") +; + +c:long +3 +; + +testKnnInStatsPushableAndNonPushable +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where + +from books +| stats c = count(*) where (match(title, "lord") and ratings > 4.5) or (match(author, "dostoevsky") and length(title) > 50) +; + +c:long +6 +; + +testKnnInStatsPushable +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where + +from books +| stats c = count(*) where match(author, "tolkien") +; + +c:long +22 +; + +testKnnInStatsWithOptions +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where + +FROM books +| STATS c = count(*) where match(title, "Hobbit Back Again", {"operator": "AND"}) +; + +c:long +1 +; + +testKnnInStatsWithNonPushableDisjunctions +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where + +FROM books +| STATS c = count(*) where match(title, "lord") or length(title) > 130 +; + +c:long +5 +; + +testKnnInStatsWithMultipleAggs +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where +FROM books +| STATS c = count(*) where match(title, "lord"), m = max(book_no::integer) where match(author, "tolkien"), n = min(book_no::integer) where match(author, "dostoevsky") +; + +c:long | m:integer | n:integer +4 | 9607 | 1211 +; + + +testKnnInStatsWithGrouping +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where +FROM books +| STATS r = AVG(ratings) where match(title, "Lord Rings", {"operator": "AND"}) by author | WHERE r is not null +; +ignoreOrder: true + +r:double | author: text +4.75 | Alan Lee +4.674999952316284 | J. R. R. Tolkien +4.670000076293945 | John Ronald Reuel Tolkien +4.670000076293945 | Agnes Perkins +4.670000076293945 | Charles Adolph Huttar +4.670000076293945 | Walter Scheps +4.559999942779541 | J.R.R. Tolkien +; + +testKnnInStatsWithGroupingBy +required_capability: knn_function_v4 +required_capability: full_text_functions_in_stats_where +FROM airports +| STATS c = COUNT(*) where match(country, "United States") BY scalerank +| SORT scalerank desc +; + +c: long | scalerank: long +0 | 9 +44 | 8 +10 | 7 +28 | 6 +10 | 5 +12 | 4 +10 | 3 +15 | 2 +; + + +testKnnWithReplace +required_capability: knn_function_v4 +required_capability: no_plain_strings_in_literals +from books +| keep book_no, author +| where match(author, REPLACE("FaulkneX", "X", "r")) +| sort book_no +| limit 5; + +book_no:keyword | author:text +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] +2713 | William Faulkner +2847 | Colleen Faulkner +2883 | William Faulkner +3293 | Danny Faulkner +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index ab90d30803233..c3aee67e7f6a5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -306,19 +306,3 @@ c: long | primary: boolean 41 | false 9 | true ; - -testKnnWithWithSemanticText -required_capability: knn_function_v4 -required_capability: semantic_text_field_caps - -from semantic_text -| where knn(semantic_text_dense_field, [0, 1, 2], 10) -| keep semantic_text_dense_field -| sort semantic_text_dense_field asc -; - -semantic_text_dense_field:text -all we have to decide is what to do with the time that is given to us -be excellent to each other -live long and prosper -; From a2f9b7d15329bc72eea03f02b2a7b1f1b20bbf5e Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 16:41:18 +0200 Subject: [PATCH 08/16] Back to single csv file, remove non semantic text tests --- .../knn-function-semantic-text.csv-spec | 278 ------------------ .../src/main/resources/knn-function.csv-spec | 71 +++++ 2 files changed, 71 insertions(+), 278 deletions(-) delete mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec deleted file mode 100644 index 3535203ff2ade..0000000000000 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function-semantic-text.csv-spec +++ /dev/null @@ -1,278 +0,0 @@ -testKnnWithSemanticText -required_capability: knn_function_v4 - -from semantic_text -| where knn(semantic_text_dense_field, [0, 1, 2], 10) -| keep semantic_text_dense_field -| sort semantic_text_dense_field asc -; - -semantic_text_dense_field:text -all we have to decide is what to do with the time that is given to us -be excellent to each other -live long and prosper -; - -testKnnWithSemanticTextAndKeyword -required_capability: knn_function_v4 - -from semantic_text -| where knn(semantic_text_dense_field, [0, 1, 2], 10) -| keep semantic_text_dense_field, host -| sort host asc -; - -semantic_text_dense_field:text | host:keyword -live long and prosper | host1 -all we have to decide is what to do with the time that is given to us | host2 -be excellent to each other | host3 - -; - -testKnnWithSemanticTextMultiValueField -required_capability: knn_function_v4 - -from semantic_text metadata _id -| where match(st_multi_value, "something") AND match(host, "host1") -| keep _id, st_multi_value -; - -_id: keyword | st_multi_value:text -1 | ["Hello there!", "This is a random value", "for testing purposes"] -; - -testKnnWithSemanticTextWithEvalsAndOtherFunctionsAndStats -required_capability: knn_function_v4 - -from semantic_text -| where qstr("description:some*") -| eval size = mv_count(st_multi_value) -| where knn(semantic_text_dense_field, [0, 1, 2], 10) -| STATS result = count(*) -; - -result:long -3 -; - -testKnnWithSemanticTextAndKql -required_capability: knn_function_v4 -required_capability: kql_function - -from semantic_text -| where kql("host:host1") AND knn(semantic_text_dense_field, [0, 1, 2], 10) -| KEEP host, semantic_text_dense_field -; - -host:keyword | semantic_text_dense_field:text -"host1" | live long and prosper -; - -testKnnWithOptionsFuzziness -required_capability: knn_function_v4 -required_capability: knn_function_v4_options - -from books -| where match(title, "Pings", {"fuzziness": 1}) -| keep book_no; -ignoreOrder:true - -book_no:keyword -2714 -2675 -4023 -7140 -; - -testKnnWithOptionsOperator -required_capability: knn_function_v4 -required_capability: knn_function_v4_options - -// tag::match-with-named-function-params[] -FROM books -| WHERE MATCH(title, "Hobbit Back Again", {"operator": "AND"}) -| KEEP title; -// end::match-with-named-function-params[] - -// tag::match-with-named-function-params-result[] -title:text -The Hobbit or There and Back Again -// end::match-with-named-function-params-result[] -; - -testKnnWithOptionsMinimumShouldKnn -required_capability: knn_function_v4 -required_capability: knn_function_v4_options - -from books -| where match(title, "Hobbit Back Again", {"minimum_should_match": 2}) -| keep title; - -title:text -The Hobbit or There and Back Again -; - -testKnnWithNonPushableDisjunctions -required_capability: knn_function_v4 -required_capability: full_text_functions_disjunctions_compute_engine - -from books -| where match(title, "lord") or length(title) > 130 -| keep book_no -; -ignoreOrder: true - -book_no:keyword -2675 -2714 -4023 -7140 -8678 -; - -testKnnWithNonPushableDisjunctionsOnComplexExpressions -required_capability: knn_function_v4 -required_capability: full_text_functions_disjunctions_compute_engine - -from books -| where (match(title, "lord") and ratings > 4.5) or (match(author, "dostoevsky") and length(title) > 50) -| keep book_no -; -ignoreOrder: true - -book_no:keyword -2675 -2924 -4023 -1937 -7140 -2714 -; - -testKnnInStatsNonPushable -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where - -from books -| where length(title) > 40 -| stats c = count(*) where match(title, "Lord") -; - -c:long -3 -; - -testKnnInStatsPushableAndNonPushable -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where - -from books -| stats c = count(*) where (match(title, "lord") and ratings > 4.5) or (match(author, "dostoevsky") and length(title) > 50) -; - -c:long -6 -; - -testKnnInStatsPushable -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where - -from books -| stats c = count(*) where match(author, "tolkien") -; - -c:long -22 -; - -testKnnInStatsWithOptions -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where - -FROM books -| STATS c = count(*) where match(title, "Hobbit Back Again", {"operator": "AND"}) -; - -c:long -1 -; - -testKnnInStatsWithNonPushableDisjunctions -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where - -FROM books -| STATS c = count(*) where match(title, "lord") or length(title) > 130 -; - -c:long -5 -; - -testKnnInStatsWithMultipleAggs -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where -FROM books -| STATS c = count(*) where match(title, "lord"), m = max(book_no::integer) where match(author, "tolkien"), n = min(book_no::integer) where match(author, "dostoevsky") -; - -c:long | m:integer | n:integer -4 | 9607 | 1211 -; - - -testKnnInStatsWithGrouping -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where -FROM books -| STATS r = AVG(ratings) where match(title, "Lord Rings", {"operator": "AND"}) by author | WHERE r is not null -; -ignoreOrder: true - -r:double | author: text -4.75 | Alan Lee -4.674999952316284 | J. R. R. Tolkien -4.670000076293945 | John Ronald Reuel Tolkien -4.670000076293945 | Agnes Perkins -4.670000076293945 | Charles Adolph Huttar -4.670000076293945 | Walter Scheps -4.559999942779541 | J.R.R. Tolkien -; - -testKnnInStatsWithGroupingBy -required_capability: knn_function_v4 -required_capability: full_text_functions_in_stats_where -FROM airports -| STATS c = COUNT(*) where match(country, "United States") BY scalerank -| SORT scalerank desc -; - -c: long | scalerank: long -0 | 9 -44 | 8 -10 | 7 -28 | 6 -10 | 5 -12 | 4 -10 | 3 -15 | 2 -; - - -testKnnWithReplace -required_capability: knn_function_v4 -required_capability: no_plain_strings_in_literals -from books -| keep book_no, author -| where match(author, REPLACE("FaulkneX", "X", "r")) -| sort book_no -| limit 5; - -book_no:keyword | author:text -2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] -2713 | William Faulkner -2847 | Colleen Faulkner -2883 | William Faulkner -3293 | Danny Faulkner -; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index c3aee67e7f6a5..d1070ca4e6131 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -306,3 +306,74 @@ c: long | primary: boolean 41 | false 9 | true ; + +testKnnWithSemanticText +required_capability: knn_function_v4 + +from semantic_text +| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| keep semantic_text_dense_field +| sort semantic_text_dense_field asc +; + +semantic_text_dense_field:text +all we have to decide is what to do with the time that is given to us +be excellent to each other +live long and prosper +; + +testKnnWithSemanticTextAndKeyword +required_capability: knn_function_v4 + +from semantic_text +| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| keep semantic_text_dense_field, host +| sort host asc +; + +semantic_text_dense_field:text | host:keyword +live long and prosper | host1 +all we have to decide is what to do with the time that is given to us | host2 +be excellent to each other | host3 + +; + +testKnnWithSemanticTextMultiValueField +required_capability: knn_function_v4 + +from semantic_text metadata _id +| where match(st_multi_value, "something") AND match(host, "host1") +| keep _id, st_multi_value +; + +_id: keyword | st_multi_value:text +1 | ["Hello there!", "This is a random value", "for testing purposes"] +; + +testKnnWithSemanticTextWithEvalsAndOtherFunctionsAndStats +required_capability: knn_function_v4 + +from semantic_text +| where qstr("description:some*") +| eval size = mv_count(st_multi_value) +| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| STATS result = count(*) +; + +result:long +3 +; + +testKnnWithSemanticTextAndKql +required_capability: knn_function_v4 +required_capability: kql_function + +from semantic_text +| where kql("host:host1") AND knn(semantic_text_dense_field, [0, 1, 2], 10) +| KEEP host, semantic_text_dense_field +; + +host:keyword | semantic_text_dense_field:text +"host1" | live long and prosper +; + From 10d2f48d75209d58d59b1cece10051f0690e83a7 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 17:53:53 +0200 Subject: [PATCH 09/16] Add test for sparse vector --- .../qa/single_node/KnnSemanticTextIT.java | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java index 4aae03ec03ea8..9e0d81b2ea74c 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java @@ -61,8 +61,8 @@ public void checkCapability() { public void testKnnQueryWithSemanticText() throws IOException { String knnQuery = """ FROM semantic-test METADATA _score - | WHERE knn(semantic, [0, 1, 2], 10) - | KEEP id, _score, semantic + | WHERE knn(dense_semantic, [0, 1, 2], 10) + | KEEP id, _score, dense_semantic | SORT _score DESC | LIMIT 10 """; @@ -83,7 +83,21 @@ public void testKnnQueryOnTextField() throws IOException { String knnQuery = """ FROM semantic-test METADATA _score | WHERE knn(text, [0, 1, 2], 10) - | KEEP id, _score, semantic + | KEEP id, _score, dense_semantic + | SORT _score DESC + | LIMIT 10 + """; + + ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(knnQuery)); + assertThat(re.getResponse().getStatusLine().getStatusCode(), is(BAD_REQUEST.getStatus())); + assertThat(re.getMessage(), containsString("[knn] queries are only supported on [dense_vector] fields")); + } + + public void testKnnQueryOnSparseSemanticTextField() throws IOException { + String knnQuery = """ + FROM semantic-test METADATA _score + | WHERE knn(sparse_semantic, [0, 1, 2], 10) + | KEEP id, _score, sparse_semantic | SORT _score DESC | LIMIT 10 """; @@ -94,7 +108,13 @@ public void testKnnQueryOnTextField() throws IOException { } @Before - public void setupIndex() throws IOException { + public void setUp() throws Exception { + super.setUp(); + setupInferenceEndpoints(); + setupIndex(); + } + + private void setupIndex() throws IOException { Request request = new Request("PUT", "/semantic-test"); request.setJsonEntity(""" { @@ -103,13 +123,17 @@ public void setupIndex() throws IOException { "id": { "type": "integer" }, - "semantic": { + "dense_semantic": { "type": "semantic_text", "inference_id": "test_dense_inference" }, + "sparse_semantic": { + "type": "semantic_text", + "inference_id": "test_sparse_inference" + }, "text": { "type": "text", - "copy_to": "semantic" + "copy_to": ["dense_semantic", "sparse_semantic"] } } }, @@ -124,7 +148,6 @@ public void setupIndex() throws IOException { assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); request = new Request("POST", "/_bulk?index=semantic-test&refresh=true"); - // 4 documents with a null in the middle, leading to 3 ESQL pages and 3 Arrow batches request.setJsonEntity(""" {"index": {"_id": "1"}} {"id": 1, "text": "sample text"} @@ -136,18 +159,22 @@ public void setupIndex() throws IOException { assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); } - @Before - public void setupInferenceEndpoint() throws IOException { + private void setupInferenceEndpoints() throws IOException { CsvTestsDataLoader.createTextEmbeddingInferenceEndpoint(client()); + CsvTestsDataLoader.createSparseEmbeddingInferenceEndpoint(client()); } @After - public void removeIndexAndInferenceEndpoint() throws IOException { + public void tearDown() throws Exception { + super.tearDown(); client().performRequest(new Request("DELETE", "semantic-test")); if (CsvTestsDataLoader.clusterHasTextEmbeddingInferenceEndpoint(client())) { CsvTestsDataLoader.deleteTextEmbeddingInferenceEndpoint(client()); } + if (CsvTestsDataLoader.clusterHasSparseEmbeddingInferenceEndpoint(client())) { + CsvTestsDataLoader.deleteSparseEmbeddingInferenceEndpoint(client()); + } } private Map runEsqlQuery(String query) throws IOException { From 9aae2d54330bdbb20bd8209ff291091775a7a21b Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 18:00:13 +0200 Subject: [PATCH 10/16] Create single and multi node IT --- .../esql/qa/multi_node/KnnSemanticTextIT.java | 28 +++ .../qa/single_node/KnnSemanticTextIT.java | 161 +---------------- .../esql/qa/rest/KnnSemanticTextTestCase.java | 164 ++++++++++++++++++ 3 files changed, 194 insertions(+), 159 deletions(-) create mode 100644 x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/KnnSemanticTextIT.java create mode 100644 x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/KnnSemanticTextIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/KnnSemanticTextIT.java new file mode 100644 index 0000000000000..5b57b8edf3ca8 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/KnnSemanticTextIT.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.multi_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.qa.rest.KnnSemanticTextTestCase; +import org.junit.ClassRule; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class KnnSemanticTextIT extends KnnSemanticTextTestCase { + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster( + spec -> spec.module("x-pack-inference").plugin("inference-service-test") + ); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java index 9e0d81b2ea74c..28db61abee562 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/KnnSemanticTextIT.java @@ -9,176 +9,19 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; -import org.elasticsearch.client.Request; -import org.elasticsearch.client.ResponseException; import org.elasticsearch.test.TestClustersThreadFilter; import org.elasticsearch.test.cluster.ElasticsearchCluster; -import org.elasticsearch.test.rest.ESRestTestCase; -import org.elasticsearch.xpack.esql.AssertWarnings; -import org.elasticsearch.xpack.esql.CsvTestsDataLoader; -import org.elasticsearch.xpack.esql.action.EsqlCapabilities; -import org.elasticsearch.xpack.esql.qa.rest.ProfileLogger; -import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase; -import org.junit.After; -import org.junit.Before; +import org.elasticsearch.xpack.esql.qa.rest.KnnSemanticTextTestCase; import org.junit.ClassRule; -import org.junit.Rule; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import static org.elasticsearch.rest.RestStatus.BAD_REQUEST; -import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder; -import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.runEsqlSync; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.core.StringContains.containsString; @ThreadLeakFilters(filters = TestClustersThreadFilter.class) -public class KnnSemanticTextIT extends ESRestTestCase { +public class KnnSemanticTextIT extends KnnSemanticTextTestCase { @ClassRule public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test")); - @Rule(order = Integer.MIN_VALUE) - public ProfileLogger profileLogger = new ProfileLogger(); - - private int numDocs; - private final Map indexedTexts = new HashMap<>(); - @Override protected String getTestRestCluster() { return cluster.getHttpAddresses(); } - - @Before - public void checkCapability() { - assumeTrue("knn with semantic text not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); - } - - @SuppressWarnings("unchecked") - public void testKnnQueryWithSemanticText() throws IOException { - String knnQuery = """ - FROM semantic-test METADATA _score - | WHERE knn(dense_semantic, [0, 1, 2], 10) - | KEEP id, _score, dense_semantic - | SORT _score DESC - | LIMIT 10 - """; - - Map response = runEsqlQuery(knnQuery); - List> columns = (List>) response.get("columns"); - assertThat(columns.size(), is(3)); - List> rows = (List>) response.get("values"); - assertThat(rows.size(), is(3)); - for (int row = 0; row < rows.size(); row++) { - List rowData = rows.get(row); - Integer id = (Integer) rowData.get(0); - assertThat(id, is(3 - row)); - } - } - - public void testKnnQueryOnTextField() throws IOException { - String knnQuery = """ - FROM semantic-test METADATA _score - | WHERE knn(text, [0, 1, 2], 10) - | KEEP id, _score, dense_semantic - | SORT _score DESC - | LIMIT 10 - """; - - ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(knnQuery)); - assertThat(re.getResponse().getStatusLine().getStatusCode(), is(BAD_REQUEST.getStatus())); - assertThat(re.getMessage(), containsString("[knn] queries are only supported on [dense_vector] fields")); - } - - public void testKnnQueryOnSparseSemanticTextField() throws IOException { - String knnQuery = """ - FROM semantic-test METADATA _score - | WHERE knn(sparse_semantic, [0, 1, 2], 10) - | KEEP id, _score, sparse_semantic - | SORT _score DESC - | LIMIT 10 - """; - - ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(knnQuery)); - assertThat(re.getResponse().getStatusLine().getStatusCode(), is(BAD_REQUEST.getStatus())); - assertThat(re.getMessage(), containsString("[knn] queries are only supported on [dense_vector] fields")); - } - - @Before - public void setUp() throws Exception { - super.setUp(); - setupInferenceEndpoints(); - setupIndex(); - } - - private void setupIndex() throws IOException { - Request request = new Request("PUT", "/semantic-test"); - request.setJsonEntity(""" - { - "mappings": { - "properties": { - "id": { - "type": "integer" - }, - "dense_semantic": { - "type": "semantic_text", - "inference_id": "test_dense_inference" - }, - "sparse_semantic": { - "type": "semantic_text", - "inference_id": "test_sparse_inference" - }, - "text": { - "type": "text", - "copy_to": ["dense_semantic", "sparse_semantic"] - } - } - }, - "settings": { - "index": { - "number_of_shards": 1, - "number_of_replicas": 0 - } - } - } - """); - assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); - - request = new Request("POST", "/_bulk?index=semantic-test&refresh=true"); - request.setJsonEntity(""" - {"index": {"_id": "1"}} - {"id": 1, "text": "sample text"} - {"index": {"_id": "2"}} - {"id": 2, "text": "another sample text"} - {"index": {"_id": "3"}} - {"id": 3, "text": "yet another sample text"} - """); - assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); - } - - private void setupInferenceEndpoints() throws IOException { - CsvTestsDataLoader.createTextEmbeddingInferenceEndpoint(client()); - CsvTestsDataLoader.createSparseEmbeddingInferenceEndpoint(client()); - } - - @After - public void tearDown() throws Exception { - super.tearDown(); - client().performRequest(new Request("DELETE", "semantic-test")); - - if (CsvTestsDataLoader.clusterHasTextEmbeddingInferenceEndpoint(client())) { - CsvTestsDataLoader.deleteTextEmbeddingInferenceEndpoint(client()); - } - if (CsvTestsDataLoader.clusterHasSparseEmbeddingInferenceEndpoint(client())) { - CsvTestsDataLoader.deleteSparseEmbeddingInferenceEndpoint(client()); - } - } - - private Map runEsqlQuery(String query) throws IOException { - RestEsqlTestCase.RequestObjectBuilder builder = requestObjectBuilder().query(query); - return runEsqlSync(builder, new AssertWarnings.NoWarnings(), profileLogger); - } } diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java new file mode 100644 index 0000000000000..f94ca4f5aa9d3 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java @@ -0,0 +1,164 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.rest; + +import org.elasticsearch.client.Request; +import org.elasticsearch.client.ResponseException; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xpack.esql.AssertWarnings; +import org.elasticsearch.xpack.esql.CsvTestsDataLoader; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.rest.RestStatus.BAD_REQUEST; +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder; +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.runEsqlSync; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.core.StringContains.containsString; + +public class KnnSemanticTextTestCase extends ESRestTestCase { + + @Rule(order = Integer.MIN_VALUE) + public ProfileLogger profileLogger = new ProfileLogger(); + + @Before + public void checkCapability() { + assumeTrue("knn with semantic text not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + } + + @SuppressWarnings("unchecked") + public void testKnnQueryWithSemanticText() throws IOException { + String knnQuery = """ + FROM semantic-test METADATA _score + | WHERE knn(dense_semantic, [0, 1, 2], 10) + | KEEP id, _score, dense_semantic + | SORT _score DESC + | LIMIT 10 + """; + + Map response = runEsqlQuery(knnQuery); + List> columns = (List>) response.get("columns"); + assertThat(columns.size(), is(3)); + List> rows = (List>) response.get("values"); + assertThat(rows.size(), is(3)); + for (int row = 0; row < rows.size(); row++) { + List rowData = rows.get(row); + Integer id = (Integer) rowData.get(0); + assertThat(id, is(3 - row)); + } + } + + public void testKnnQueryOnTextField() throws IOException { + String knnQuery = """ + FROM semantic-test METADATA _score + | WHERE knn(text, [0, 1, 2], 10) + | KEEP id, _score, dense_semantic + | SORT _score DESC + | LIMIT 10 + """; + + ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(knnQuery)); + assertThat(re.getResponse().getStatusLine().getStatusCode(), is(BAD_REQUEST.getStatus())); + assertThat(re.getMessage(), containsString("[knn] queries are only supported on [dense_vector] fields")); + } + + public void testKnnQueryOnSparseSemanticTextField() throws IOException { + String knnQuery = """ + FROM semantic-test METADATA _score + | WHERE knn(sparse_semantic, [0, 1, 2], 10) + | KEEP id, _score, sparse_semantic + | SORT _score DESC + | LIMIT 10 + """; + + ResponseException re = expectThrows(ResponseException.class, () -> runEsqlQuery(knnQuery)); + assertThat(re.getResponse().getStatusLine().getStatusCode(), is(BAD_REQUEST.getStatus())); + assertThat(re.getMessage(), containsString("[knn] queries are only supported on [dense_vector] fields")); + } + + @Before + public void setUp() throws Exception { + super.setUp(); + setupInferenceEndpoints(); + setupIndex(); + } + + private void setupIndex() throws IOException { + Request request = new Request("PUT", "/semantic-test"); + request.setJsonEntity(""" + { + "mappings": { + "properties": { + "id": { + "type": "integer" + }, + "dense_semantic": { + "type": "semantic_text", + "inference_id": "test_dense_inference" + }, + "sparse_semantic": { + "type": "semantic_text", + "inference_id": "test_sparse_inference" + }, + "text": { + "type": "text", + "copy_to": ["dense_semantic", "sparse_semantic"] + } + } + }, + "settings": { + "index": { + "number_of_shards": 1, + "number_of_replicas": 0 + } + } + } + """); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + + request = new Request("POST", "/_bulk?index=semantic-test&refresh=true"); + request.setJsonEntity(""" + {"index": {"_id": "1"}} + {"id": 1, "text": "sample text"} + {"index": {"_id": "2"}} + {"id": 2, "text": "another sample text"} + {"index": {"_id": "3"}} + {"id": 3, "text": "yet another sample text"} + """); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + } + + private void setupInferenceEndpoints() throws IOException { + CsvTestsDataLoader.createTextEmbeddingInferenceEndpoint(client()); + CsvTestsDataLoader.createSparseEmbeddingInferenceEndpoint(client()); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + client().performRequest(new Request("DELETE", "semantic-test")); + + if (CsvTestsDataLoader.clusterHasTextEmbeddingInferenceEndpoint(client())) { + CsvTestsDataLoader.deleteTextEmbeddingInferenceEndpoint(client()); + } + if (CsvTestsDataLoader.clusterHasSparseEmbeddingInferenceEndpoint(client())) { + CsvTestsDataLoader.deleteSparseEmbeddingInferenceEndpoint(client()); + } + } + + private Map runEsqlQuery(String query) throws IOException { + RestEsqlTestCase.RequestObjectBuilder builder = requestObjectBuilder().query(query); + return runEsqlSync(builder, new AssertWarnings.NoWarnings(), profileLogger); + } +} From 6e437ac96d744e5387eb61d423b664b03d7feeed Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 18:01:18 +0200 Subject: [PATCH 11/16] Spotless --- .../elasticsearch/xpack/esql/expression/function/vector/Knn.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 2baa1c757c21e..44eaeea1ea7d8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -67,7 +67,6 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; -import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; import static org.elasticsearch.xpack.esql.expression.Foldables.TypeResolutionValidator.forPreOptimizationValidation; import static org.elasticsearch.xpack.esql.expression.Foldables.resolveTypeQuery; From 3cf742edf5112aff5d8fa2778acb007ed17ad1fa Mon Sep 17 00:00:00 2001 From: cdelgado Date: Mon, 1 Sep 2025 18:09:01 +0200 Subject: [PATCH 12/16] Add comments --- .../xpack/esql/qa/rest/KnnSemanticTextTestCase.java | 3 +++ .../xpack/esql/expression/function/vector/Knn.java | 1 + 2 files changed, 4 insertions(+) diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java index f94ca4f5aa9d3..120daa1e7f258 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java @@ -27,6 +27,9 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.core.StringContains.containsString; +/** + * Tests kNN queries on semantic_text fields. Mostly checks errors on the data node that can't be checked in other tests. + */ public class KnnSemanticTextTestCase extends ESRestTestCase { @Rule(order = Integer.MIN_VALUE) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 44eaeea1ea7d8..1a7ef3d39ff51 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -213,6 +213,7 @@ protected TypeResolution resolveParams() { private TypeResolution resolveField() { return isNotNull(field(), sourceText(), FIRST).and( + // It really should be semantic_text instead of text, but field_caps retrieves semantic_text fields as text isType(field(), dt -> dt == TEXT, sourceText(), FIRST, ACCEPTED_FIELD_TYPES).or( isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), FIRST, ACCEPTED_FIELD_TYPES) ) From 7b0cc2f236c508160adf20b0d842976ca940b8ac Mon Sep 17 00:00:00 2001 From: cdelgado Date: Tue, 2 Sep 2025 09:59:03 +0200 Subject: [PATCH 13/16] Fix serverless tests --- .../xpack/esql/qa/rest/KnnSemanticTextTestCase.java | 6 ------ 1 file changed, 6 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java index 120daa1e7f258..963badd5ca861 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java @@ -119,12 +119,6 @@ private void setupIndex() throws IOException { "copy_to": ["dense_semantic", "sparse_semantic"] } } - }, - "settings": { - "index": { - "number_of_shards": 1, - "number_of_replicas": 0 - } } } """); From 7ea805014d5002038a175dd5279ae9a29e94e62f Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 13:24:11 +0200 Subject: [PATCH 14/16] Bump capability, fix tests --- .../xpack/esql/qa/rest/EsqlSpecTestCase.java | 4 +- .../esql/qa/rest/KnnSemanticTextTestCase.java | 8 +-- .../src/main/resources/knn-function.csv-spec | 53 ++++++++++--------- .../xpack/esql/plugin/KnnFunctionIT.java | 2 +- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../function/vector/VectorWritables.java | 2 +- .../elasticsearch/xpack/esql/CsvTests.java | 2 +- .../xpack/esql/analysis/AnalyzerTests.java | 2 +- .../xpack/esql/analysis/VerifierTests.java | 18 +++---- .../function/fulltext/KnnTests.java | 2 +- .../LocalPhysicalPlanOptimizerTests.java | 24 ++++----- .../optimizer/LogicalPlanOptimizerTests.java | 30 +++++------ 12 files changed, 75 insertions(+), 74 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java index 8c53bce5faf0a..8c6978e1423f8 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java @@ -73,7 +73,7 @@ import static org.elasticsearch.xpack.esql.CsvTestsDataLoader.loadDataSetIntoEs; import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.COMPLETION; -import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.KNN_FUNCTION_V4; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.KNN_FUNCTION_V5; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METRICS_COMMAND; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.RERANK; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.SEMANTIC_TEXT_FIELD_CAPS; @@ -216,7 +216,7 @@ protected boolean requiresInferenceEndpoint() { SEMANTIC_TEXT_FIELD_CAPS.capabilityName(), RERANK.capabilityName(), COMPLETION.capabilityName(), - KNN_FUNCTION_V4.capabilityName() + KNN_FUNCTION_V5.capabilityName() ).anyMatch(testCase.requiredCapabilities::contains); } diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java index 963badd5ca861..19f1327d15599 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/KnnSemanticTextTestCase.java @@ -37,14 +37,14 @@ public class KnnSemanticTextTestCase extends ESRestTestCase { @Before public void checkCapability() { - assumeTrue("knn with semantic text not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn with semantic text not available", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); } @SuppressWarnings("unchecked") public void testKnnQueryWithSemanticText() throws IOException { String knnQuery = """ FROM semantic-test METADATA _score - | WHERE knn(dense_semantic, [0, 1, 2], 10) + | WHERE knn(dense_semantic, [0, 1, 2]) | KEEP id, _score, dense_semantic | SORT _score DESC | LIMIT 10 @@ -65,7 +65,7 @@ public void testKnnQueryWithSemanticText() throws IOException { public void testKnnQueryOnTextField() throws IOException { String knnQuery = """ FROM semantic-test METADATA _score - | WHERE knn(text, [0, 1, 2], 10) + | WHERE knn(text, [0, 1, 2]) | KEEP id, _score, dense_semantic | SORT _score DESC | LIMIT 10 @@ -79,7 +79,7 @@ public void testKnnQueryOnTextField() throws IOException { public void testKnnQueryOnSparseSemanticTextField() throws IOException { String knnQuery = """ FROM semantic-test METADATA _score - | WHERE knn(sparse_semantic, [0, 1, 2], 10) + | WHERE knn(sparse_semantic, [0, 1, 2]) | KEEP id, _score, sparse_semantic | SORT _score DESC | LIMIT 10 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec index 4462eb157c966..e65d65f414cd1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/knn-function.csv-spec @@ -3,7 +3,7 @@ # top-n query at the shard level knnSearch -required_capability: knn_function_v4 +required_capability: knn_function_v5 // tag::knn-function[] from colors metadata _score @@ -30,7 +30,7 @@ chartreuse | [127.0, 255.0, 0.0] ; knnSearchWithSimilarityOption -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where knn(rgb_vector, [255,192,203], {"similarity": 40}) @@ -46,7 +46,7 @@ wheat | [245.0, 222.0, 179.0] ; knnHybridSearch -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where match(color, "blue") or knn(rgb_vector, [65,105,225]) @@ -69,7 +69,7 @@ yellow | [255.0, 255.0, 0.0] ; knnWithPrefilter -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors | where knn(rgb_vector, [120,180,0]) and (match(color, "olive") or match(color, "green")) @@ -83,7 +83,7 @@ olive ; knnWithNegatedPrefilter -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where knn(rgb_vector, [128,128,0]) and not (match(color, "olive") or match(color, "chocolate")) @@ -106,7 +106,7 @@ orange | [255.0, 165.0, 0.0] ; knnAfterKeep -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | keep rgb_vector, color, _score @@ -125,7 +125,7 @@ rgb_vector:dense_vector ; knnAfterDrop -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | drop primary @@ -144,7 +144,7 @@ lime | [0.0, 255.0, 0.0] ; knnAfterEval -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | eval composed_name = locate(color, " ") > 0 @@ -163,7 +163,7 @@ golden rod | true ; knnWithConjunction -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where knn(rgb_vector, [255,255,238]) and hex_code like "#FFF*" @@ -183,7 +183,7 @@ yellow | #FFFF00 | [255.0, 255.0, 0.0] ; knnWithDisjunctionAndFiltersConjunction -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where (knn(rgb_vector, [0,255,255]) or knn(rgb_vector, [128, 0, 255])) and primary == true @@ -206,7 +206,7 @@ yellow | [255.0, 255.0, 0.0] ; knnWithNegationsAndFiltersConjunction -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where (knn(rgb_vector, [0,255,255]) and not(primary == true and match(color, "blue"))) @@ -229,7 +229,7 @@ azure | [240.0, 255.0, 255.0] ; knnWithNonPushableConjunction -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | eval composed_name = locate(color, " ") > 0 @@ -253,7 +253,7 @@ maroon | false ; testKnnWithNonPushableDisjunctions -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where knn(rgb_vector, [128,128,0]) or length(color) > 10 @@ -276,7 +276,7 @@ green ; testKnnWithNonPushableDisjunctionsAndMinCandidates -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where (knn(rgb_vector, [128,128,0], {"min_candidates": 2}) and length(color) > 10) or (knn(rgb_vector, [128,0,128], {"min_candidates": 2}) and primary == true) @@ -300,7 +300,7 @@ cyan | true ; testKnnWithStats -required_capability: knn_function_v4 +required_capability: knn_function_v5 from colors metadata _score | where knn(rgb_vector, [128,128,0]) @@ -314,7 +314,7 @@ c:long ; testKnnWithRerank -required_capability: knn_function_v4 +required_capability: knn_function_v5 required_capability: rerank from colors metadata _score @@ -340,10 +340,10 @@ golden rod ; testKnnWithSemanticText -required_capability: knn_function_v4 +required_capability: knn_function_v5 from semantic_text -| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| where knn(semantic_text_dense_field, [0, 1, 2]) | keep semantic_text_dense_field | sort semantic_text_dense_field asc ; @@ -355,10 +355,10 @@ live long and prosper ; testKnnWithSemanticTextAndKeyword -required_capability: knn_function_v4 +required_capability: knn_function_v5 from semantic_text -| where knn(semantic_text_dense_field, [0, 1, 2], 10) +| where knn(semantic_text_dense_field, [0, 1, 2]) | keep semantic_text_dense_field, host | sort host asc ; @@ -371,7 +371,7 @@ be excellent to each other | host3 ; testKnnWithSemanticTextMultiValueField -required_capability: knn_function_v4 +required_capability: knn_function_v5 from semantic_text metadata _id | where match(st_multi_value, "something") AND match(host, "host1") @@ -383,13 +383,14 @@ _id: keyword | st_multi_value:text ; testKnnWithSemanticTextWithEvalsAndOtherFunctionsAndStats -required_capability: knn_function_v4 +required_capability: knn_function_v5 from semantic_text | where qstr("description:some*") | eval size = mv_count(st_multi_value) -| where knn(semantic_text_dense_field, [0, 1, 2], 10) -| STATS result = count(*) +| where knn(semantic_text_dense_field, [0, 1, 2]) +| limit 100 +| stats result = count(*) ; result:long @@ -397,11 +398,11 @@ result:long ; testKnnWithSemanticTextAndKql -required_capability: knn_function_v4 +required_capability: knn_function_v5 required_capability: kql_function from semantic_text -| where kql("host:host1") AND knn(semantic_text_dense_field, [0, 1, 2], 10) +| where kql("host:host1") AND knn(semantic_text_dense_field, [0, 1, 2]) | KEEP host, semantic_text_dense_field ; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java index 21ec240d9f8f4..70356cd36f34e 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/KnnFunctionIT.java @@ -196,7 +196,7 @@ public void testKnnWithLookupJoin() { @Before public void setup() throws IOException { - assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("Needs KNN support", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var indexName = "test"; var client = client().admin().indices(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 9a69e9c86fe10..609164170ef69 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1291,7 +1291,7 @@ public enum Cap { /** * Support knn function */ - KNN_FUNCTION_V4(Build.current().isSnapshot()), + KNN_FUNCTION_V5(Build.current().isSnapshot()), /** * Support for the LIKE operator with a list of wildcards. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java index ab41201ceb328..d43ec3e92981d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java @@ -27,7 +27,7 @@ private VectorWritables() { public static List getNamedWritables() { List entries = new ArrayList<>(); - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { entries.add(Knn.ENTRY); } if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 97429ea091053..2941355eac39c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -305,7 +305,7 @@ public final void test() throws Throwable { ); assumeFalse( "can't use KNN function in csv tests", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KNN_FUNCTION_V4.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.KNN_FUNCTION_V5.capabilityName()) ); assumeFalse( "lookup join disabled for csv tests", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 95a7204b5c71f..d1700fe5be24f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2349,7 +2349,7 @@ public void testImplicitCasting() { public void testDenseVectorImplicitCastingKnn() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); checkDenseVectorCastingKnn("float_vector"); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 7ff80f1b987fb..a83dae458934e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1268,7 +1268,7 @@ public void testFieldBasedFullTextFunctions() throws Exception { checkFieldBasedWithNonIndexedColumn("Term", "term(text, \"cat\")", "function"); checkFieldBasedFunctionNotAllowedAfterCommands("Term", "function", "term(title, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkFieldBasedFunctionNotAllowedAfterCommands("KNN", "function", "knn(vector, [1, 2, 3])"); } } @@ -1401,7 +1401,7 @@ public void testFullTextFunctionsOnlyAllowedInWhere() throws Exception { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("MultiMatch", "multi_match(\"Meditation\", title, body)", "function"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkFullTextFunctionsOnlyAllowedInWhere("KNN", "knn(vector, [0, 1, 2])", "function"); } @@ -1456,7 +1456,7 @@ public void testFullTextFunctionsDisjunctions() { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkWithFullTextFunctionsDisjunctions("term(title, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkWithFullTextFunctionsDisjunctions("knn(vector, [1, 2, 3])"); } } @@ -1521,7 +1521,7 @@ public void testFullTextFunctionsWithNonBooleanFunctions() { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("Term", "term(title, \"Meditation\")", "function"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkFullTextFunctionsWithNonBooleanFunctions("KNN", "knn(vector, [1, 2, 3])", "function"); } } @@ -1592,7 +1592,7 @@ public void testFullTextFunctionsTargetsExistingField() throws Exception { if (EsqlCapabilities.Cap.TERM_FUNCTION.isEnabled()) { testFullTextFunctionTargetsExistingField("term(fist_name, \"Meditation\")"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { testFullTextFunctionTargetsExistingField("knn(vector, [0, 1, 2], 10)"); } } @@ -2189,7 +2189,7 @@ public void testFullTextFunctionOptions() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkOptionDataTypes(MultiMatch.OPTIONS, "FROM test | WHERE MULTI_MATCH(\"Jean\", title, body, {\"%s\": %s})"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkOptionDataTypes(Knn.ALLOWED_OPTIONS, "FROM test | WHERE KNN(vector, [0.1, 0.2, 0.3], {\"%s\": %s})"); } } @@ -2282,7 +2282,7 @@ public void testFullTextFunctionsNullArgs() throws Exception { checkFullTextFunctionNullArgs("term(null, \"query\")", "first"); checkFullTextFunctionNullArgs("term(title, null)", "second"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkFullTextFunctionNullArgs("knn(null, [0, 1, 2])", "first"); checkFullTextFunctionNullArgs("knn(vector, null)", "second"); } @@ -2313,7 +2313,7 @@ public void testFullTextFunctionsInStats() { if (EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.isEnabled()) { checkFullTextFunctionsInStats("multi_match(\"Meditation\", title, body)"); } - if (EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()) { + if (EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()) { checkFullTextFunctionsInStats("knn(vector, [0, 1, 2])"); } } @@ -2376,7 +2376,7 @@ public void testVectorSimilarityFunctionsNullArgs() throws Exception { } public void testFullTextFunctionsWithSemanticText() { - checkFullTextFunctionsWithSemanticText("knn(semantic, [0, 1, 2], 10)"); + checkFullTextFunctionsWithSemanticText("knn(semantic, [0, 1, 2])"); checkFullTextFunctionsWithSemanticText("match(semantic, \"hello world\")"); checkFullTextFunctionsWithSemanticText("semantic:\"hello world\""); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java index f87e278bd4238..49a6d3c904203 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/fulltext/KnnTests.java @@ -52,7 +52,7 @@ public static Iterable parameters() { @Before public void checkCapability() { - assumeTrue("KNN is not enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("KNN is not enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); } private static List testCaseSuppliers() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index d1bb7aeaa166a..573713e4501be 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -1376,7 +1376,7 @@ public void testMultiMatchOptionsPushDown() { public void testKnnOptionsPushDown() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -1402,7 +1402,7 @@ public void testKnnOptionsPushDown() { public void testKnnUsesLimitForK() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -1421,7 +1421,7 @@ public void testKnnUsesLimitForK() { public void testKnnKAndMinCandidatesLowerK() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -1440,7 +1440,7 @@ public void testKnnKAndMinCandidatesLowerK() { public void testKnnKAndMinCandidatesHigherK() { assumeTrue("dense_vector capability not available", EsqlCapabilities.Cap.DENSE_VECTOR_FIELD_TYPE.isEnabled()); - assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn capability not available", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -1899,7 +1899,7 @@ public void testFullTextFunctionWithStatsBy(FullTextFunctionTestCase testCase) { } public void testKnnPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -1931,7 +1931,7 @@ public void testKnnPrefilters() { } public void testKnnPrefiltersWithMultipleFilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -1967,7 +1967,7 @@ public void testKnnPrefiltersWithMultipleFilters() { } public void testPushDownConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -2004,7 +2004,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { } public void testPushDownNegatedConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -2041,7 +2041,7 @@ public void testPushDownNegatedConjunctionsToKnnPrefilter() { } public void testNotPushDownDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -2070,7 +2070,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { } public void testNotPushDownKnnWithNonPushablePrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -2104,7 +2104,7 @@ public void testNotPushDownKnnWithNonPushablePrefilters() { } public void testPushDownComplexNegationsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test @@ -2154,7 +2154,7 @@ and NOT ((keyword == "test") or knn(dense_vector, [4, 5, 6]))) } public void testMultipleKnnQueriesInPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); String query = """ from test diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index c3c4a9e3f1038..269a2f42abe22 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -8499,7 +8499,7 @@ public void testSampleNoPushDownChangePoint() { } public void testPushDownConjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test @@ -8519,7 +8519,7 @@ public void testPushDownConjunctionsToKnnPrefilter() { } public void testPushDownMultipleFiltersToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test @@ -8542,7 +8542,7 @@ public void testPushDownMultipleFiltersToKnnPrefilter() { } public void testNotPushDownDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test @@ -8559,7 +8559,7 @@ public void testNotPushDownDisjunctionsToKnnPrefilter() { } public void testPushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); /* and @@ -8594,7 +8594,7 @@ public void testPushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { } public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); /* or @@ -8626,7 +8626,7 @@ public void testMorePushDownConjunctionsAndNotDisjunctionsToKnnPrefilter() { } public void testMultipleKnnQueriesInPrefilters() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); /* and @@ -8669,7 +8669,7 @@ public void testMultipleKnnQueriesInPrefilters() { } public void testKnnImplicitLimit() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test @@ -8684,7 +8684,7 @@ public void testKnnImplicitLimit() { } public void testKnnWithLimit() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test @@ -8700,7 +8700,7 @@ public void testKnnWithLimit() { } public void testKnnWithTopN() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test metadata _score @@ -8717,7 +8717,7 @@ public void testKnnWithTopN() { } public void testKnnWithMultipleLimitsAfterTopN() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test metadata _score @@ -8737,7 +8737,7 @@ public void testKnnWithMultipleLimitsAfterTopN() { } public void testKnnWithMultipleLimitsCombined() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test metadata _score @@ -8755,7 +8755,7 @@ public void testKnnWithMultipleLimitsCombined() { } public void testKnnWithMultipleClauses() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test metadata _score @@ -8778,7 +8778,7 @@ public void testKnnWithMultipleClauses() { } public void testKnnWithStats() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); assertThat( typesError("from test | where knn(dense_vector, [0, 1, 2]) | stats c = count(*)"), @@ -8787,7 +8787,7 @@ public void testKnnWithStats() { } public void testKnnWithRerankAmdTopN() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); assertThat(typesError(""" from test metadata _score @@ -8799,7 +8799,7 @@ public void testKnnWithRerankAmdTopN() { } public void testKnnWithRerankAmdLimit() { - assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V4.isEnabled()); + assumeTrue("knn must be enabled", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); var query = """ from test metadata _score From d8ef10263fe811ffaa9d75a1cd21ec330bc960e7 Mon Sep 17 00:00:00 2001 From: cdelgado Date: Wed, 3 Sep 2025 13:31:40 +0200 Subject: [PATCH 15/16] Update function signature and regenerate docs --- .../esql/_snippets/functions/description/knn.md | 2 +- .../esql/_snippets/functions/parameters/knn.md | 2 +- docs/reference/query-languages/esql/images/functions/knn.svg | 2 +- .../esql/kibana/definition/functions/knn.json | 2 +- .../query-languages/esql/kibana/docs/functions/knn.md | 2 +- .../xpack/esql/expression/function/vector/Knn.java | 5 +++-- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/knn.md b/docs/reference/query-languages/esql/_snippets/functions/description/knn.md index c39604bbf1fa6..5383b503d078b 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/description/knn.md +++ b/docs/reference/query-languages/esql/_snippets/functions/description/knn.md @@ -2,5 +2,5 @@ **Description** -Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors. +Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors or semantic_text fields. diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md index fb1b98a1e8a7a..0a234d119fac0 100644 --- a/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/knn.md @@ -3,7 +3,7 @@ **Parameters** `field` -: Field that the query will target. +: Field that the query will target. knn function can be used with dense_vector or semantic_text fields. Other text fields are not allowed `query` : Vector value to find top nearest neighbours for. diff --git a/docs/reference/query-languages/esql/images/functions/knn.svg b/docs/reference/query-languages/esql/images/functions/knn.svg index 75a104a7cdcfa..501225abc9bfc 100644 --- a/docs/reference/query-languages/esql/images/functions/knn.svg +++ b/docs/reference/query-languages/esql/images/functions/knn.svg @@ -1 +1 @@ -KNN(field,query,options) \ No newline at end of file +KNN(field,query,options) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/knn.json b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json index f4b77305a200b..cd5541f8e7bfe 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/knn.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/knn.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", "type" : "scalar", "name" : "knn", - "description" : "Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors.", + "description" : "Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors or semantic_text fields.", "signatures" : [ ], "examples" : [ "from colors metadata _score\n| where knn(rgb_vector, [0, 120, 0])\n| sort _score desc, color asc" diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/knn.md b/docs/reference/query-languages/esql/kibana/docs/functions/knn.md index bea09b0bf50de..4ad34ae271381 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/knn.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/knn.md @@ -1,7 +1,7 @@ % This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. ### KNN -Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors. +Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors or semantic_text fields. ```esql from colors metadata _score diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index 310dccb1f9e12..d1e843530292f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -101,13 +101,14 @@ public class Knn extends FullTextFunction returnType = "boolean", preview = true, description = "Finds the k nearest vectors to a query vector, as measured by a similarity metric. " - + "knn function finds nearest vectors through approximate search on indexed dense_vectors.", + + "knn function finds nearest vectors through approximate search on indexed dense_vectors or semantic_text fields.", examples = { @Example(file = "knn-function", tag = "knn-function") }, appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) } ) public Knn( Source source, - @Param(name = "field", type = { "dense_vector" }, description = "Field that the query will target.") Expression field, + @Param(name = "field", type = { "dense_vector", "text" }, description = "Field that the query will target. " + + "knn function can be used with dense_vector or semantic_text fields. Other text fields are not allowed") Expression field, @Param( name = "query", type = { "dense_vector" }, From 2e9f11897503e111c290c69417743bbecf2be130 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 3 Sep 2025 11:37:55 +0000 Subject: [PATCH 16/16] [CI] Auto commit changes from spotless --- .../xpack/esql/expression/function/vector/Knn.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java index d1e843530292f..79930ef057837 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Knn.java @@ -107,8 +107,12 @@ public class Knn extends FullTextFunction ) public Knn( Source source, - @Param(name = "field", type = { "dense_vector", "text" }, description = "Field that the query will target. " + - "knn function can be used with dense_vector or semantic_text fields. Other text fields are not allowed") Expression field, + @Param( + name = "field", + type = { "dense_vector", "text" }, + description = "Field that the query will target. " + + "knn function can be used with dense_vector or semantic_text fields. Other text fields are not allowed" + ) Expression field, @Param( name = "query", type = { "dense_vector" },