diff --git a/docs/changelog/129278.yaml b/docs/changelog/129278.yaml new file mode 100644 index 0000000000000..567fc9e5696f0 --- /dev/null +++ b/docs/changelog/129278.yaml @@ -0,0 +1,5 @@ +pr: 129278 +summary: Fix constant keyword optimization +area: ES|QL +type: bug +issues: [] diff --git a/docs/reference/esql/functions/kibana/definition/knn.json b/docs/reference/esql/functions/kibana/definition/knn.json index 48d3e582eec58..9d3e04cc94eeb 100644 --- a/docs/reference/esql/functions/kibana/definition/knn.json +++ b/docs/reference/esql/functions/kibana/definition/knn.json @@ -1,12 +1,12 @@ { - "comment" : "This is generated by ESQL’s AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", - "type" : "scalar", + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "eval", "name" : "knn", "description" : "Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors.", "signatures" : [ ], "examples" : [ - "from colors metadata _score\n| where knn(rgb_vector, [0, 120, 0])\n| sort _score desc", - "from colors metadata _score\n| where knn(rgb_vector, [0,255,255], {\"k\": 4})\n| sort _score desc" + "from colors metadata _score \n| where knn(rgb_vector, [0, 120, 0]) \n| sort _score desc, color asc", + "from colors metadata _score \n| where knn(rgb_vector, [0,255,255], {\"k\": 4})\n| sort _score desc, color asc" ], "preview" : true, "snapshot_only" : true diff --git a/docs/reference/esql/functions/kibana/docs/knn.md b/docs/reference/esql/functions/kibana/docs/knn.md index 45d1f294ea0a8..ef7475a3fb9ad 100644 --- a/docs/reference/esql/functions/kibana/docs/knn.md +++ b/docs/reference/esql/functions/kibana/docs/knn.md @@ -1,10 +1,12 @@ -% This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + ### KNN Finds the k nearest vectors to a query vector, as measured by a similarity metric. knn function finds nearest vectors through approximate search on indexed dense_vectors. -```esql -from colors metadata _score -| where knn(rgb_vector, [0, 120, 0]) -| sort _score desc +``` +from colors metadata _score +| where knn(rgb_vector, [0, 120, 0]) +| sort _score desc, color asc ``` diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java index f9162fe8cfcd7..4d07f496fe246 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/CaseInsensitiveTermQuery.java @@ -28,6 +28,15 @@ public CaseInsensitiveTermQuery(Term term) { @Override public String toString(String field) { - return this.getClass().getSimpleName() + "{" + field + ":" + term.text() + "}"; + StringBuilder buffer = new StringBuilder(); + buffer.append(getClass().getSimpleName()); + buffer.append('{'); + if (term.field().equals(field) == false) { + buffer.append(term.field()); + buffer.append(':'); + } + buffer.append(term.text()); + buffer.append('}'); + return buffer.toString(); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/LocalSourceOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/LocalSourceOperator.java index 228a883ce2b58..b856ae08e31b2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/LocalSourceOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/LocalSourceOperator.java @@ -82,7 +82,10 @@ public Page getOutput() { } @Override - public void close() { + public void close() {} + @Override + public String toString() { + return "LocalSourceOperator"; } } diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java index 6d7028d842d54..6e6ae8a737d25 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java @@ -28,10 +28,11 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.regex.Pattern; -import java.util.stream.Stream; import static org.elasticsearch.test.ListMatcher.matchesList; import static org.elasticsearch.test.MapMatcher.assertMap; @@ -56,12 +57,27 @@ public class PushQueriesIT extends ESRestTestCase { @ParametersFactory(argumentFormatting = "%1s") public static List args() { - return Stream.of("auto", "text", "match_only_text", "semantic_text").map(s -> new Object[] { s }).toList(); + return Arrays.stream(Type.values()).map(s -> new Object[] { s }).toList(); } - private final String type; + public enum Type { + AUTO(false), + CONSTANT_KEYWORD(false), + KEYWORD(false), + MATCH_ONLY_TEXT_WITH_KEYWORD(false), + SEMANTIC_TEXT_WITH_KEYWORD(true), + TEXT_WITH_KEYWORD(false); - public PushQueriesIT(String type) { + private final boolean needEmbeddings; + + Type(boolean needEmbeddings) { + this.needEmbeddings = needEmbeddings; + } + } + + private final Type type; + + public PushQueriesIT(Type type) { this.type = type; } @@ -72,17 +88,16 @@ public void testEquality() throws IOException { | WHERE test == "%value" """; String luceneQuery = switch (type) { - case "text", "auto" -> "#test.keyword:%value -_ignored:test.keyword"; - case "match_only_text" -> "*:*"; - case "semantic_text" -> "FieldExistsQuery [field=_primary_term]"; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + case AUTO, TEXT_WITH_KEYWORD -> "#test.keyword:%value -_ignored:test.keyword"; + case KEYWORD -> "test:%value"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> "*:*"; + case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; - boolean filterInCompute = switch (type) { - case "text", "auto" -> false; - case "match_only_text", "semantic_text" -> true; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + ComputeSignature dataNodeSignature = switch (type) { + case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), filterInCompute, true); + testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } public void testEqualityTooBigToPush() throws IOException { @@ -92,11 +107,15 @@ public void testEqualityTooBigToPush() throws IOException { | WHERE test == "%value" """; String luceneQuery = switch (type) { - case "text", "auto", "match_only_text" -> "*:*"; - case "semantic_text" -> "FieldExistsQuery [field=_primary_term]"; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> "*:*"; + case KEYWORD -> "#test:%value #single_value_match(test)"; + case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), true, true); + ComputeSignature dataNodeSignature = switch (type) { + case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + }; + testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, type != Type.KEYWORD); } /** @@ -110,11 +129,15 @@ public void testEqualityOrTooBig() throws IOException { | WHERE test == "%value" OR test == "%tooBig" """.replace("%tooBig", tooBig); String luceneQuery = switch (type) { - case "text", "auto", "match_only_text" -> "*:*"; - case "semantic_text" -> "FieldExistsQuery [field=_primary_term]"; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> "*:*"; + case KEYWORD -> "test:(%tooBig %value)".replace("%tooBig", tooBig); + case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + }; + ComputeSignature dataNodeSignature = switch (type) { + case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), true, true); + testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } public void testEqualityOrOther() throws IOException { @@ -124,17 +147,16 @@ public void testEqualityOrOther() throws IOException { | WHERE test == "%value" OR foo == 2 """; String luceneQuery = switch (type) { - case "text", "auto" -> "(#test.keyword:%value -_ignored:test.keyword) foo:[2 TO 2]"; - case "match_only_text" -> "*:*"; - case "semantic_text" -> "FieldExistsQuery [field=_primary_term]"; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + case AUTO, TEXT_WITH_KEYWORD -> "(#test.keyword:%value -_ignored:test.keyword) foo:[2 TO 2]"; + case KEYWORD -> "test:%value foo:[2 TO 2]"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> "*:*"; + case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; - boolean filterInCompute = switch (type) { - case "text", "auto" -> false; - case "match_only_text", "semantic_text" -> true; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + ComputeSignature dataNodeSignature = switch (type) { + case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), filterInCompute, true); + testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } public void testEqualityAndOther() throws IOException { @@ -144,22 +166,21 @@ public void testEqualityAndOther() throws IOException { | WHERE test == "%value" AND foo == 1 """; List luceneQueryOptions = switch (type) { - case "text", "auto" -> List.of("#(#test.keyword:%value -_ignored:test.keyword) #foo:[1 TO 1]"); - case "match_only_text" -> List.of("foo:[1 TO 1]"); - case "semantic_text" -> + case AUTO, TEXT_WITH_KEYWORD -> List.of("#(#test.keyword:%value -_ignored:test.keyword) #foo:[1 TO 1]"); + case KEYWORD -> List.of("#test:%value #foo:[1 TO 1]"); + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("foo:[1 TO 1]"); + case SEMANTIC_TEXT_WITH_KEYWORD -> /* * single_value_match is here because there are extra documents hiding in the index * that don't have the `foo` field. */ List.of("#foo:[1 TO 1] #single_value_match(foo)", "foo:[1 TO 1]"); - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); }; - boolean filterInCompute = switch (type) { - case "text", "auto" -> false; - case "match_only_text", "semantic_text" -> true; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + ComputeSignature dataNodeSignature = switch (type) { + case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; }; - testPushQuery(value, esqlQuery, luceneQueryOptions, filterInCompute, true); + testPushQuery(value, esqlQuery, luceneQueryOptions, dataNodeSignature, true); } public void testInequality() throws IOException { @@ -169,12 +190,16 @@ public void testInequality() throws IOException { | WHERE test != "%different_value" """; String luceneQuery = switch (type) { - case "text", "auto" -> "(-test.keyword:%different_value #*:*) _ignored:test.keyword"; - case "match_only_text" -> "*:*"; - case "semantic_text" -> "FieldExistsQuery [field=_primary_term]"; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + case AUTO, TEXT_WITH_KEYWORD -> "(-test.keyword:%different_value #*:*) _ignored:test.keyword"; + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> "*:*"; + case KEYWORD -> "-test:%different_value #*:*"; + case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), true, true); + ComputeSignature dataNodeSignature = switch (type) { + case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + }; + testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } public void testInequalityTooBigToPush() throws IOException { @@ -184,11 +209,16 @@ public void testInequalityTooBigToPush() throws IOException { | WHERE test != "%value" """; String luceneQuery = switch (type) { - case "text", "auto", "match_only_text" -> "*:*"; - case "semantic_text" -> "FieldExistsQuery [field=_primary_term]"; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> "*:*"; + case KEYWORD -> "#(-test:%value #*:*) #single_value_match(test)"; + case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; + }; + ComputeSignature dataNodeSignature = switch (type) { + case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + case CONSTANT_KEYWORD -> ComputeSignature.FIND_NONE; + case KEYWORD -> ComputeSignature.FILTER_IN_QUERY; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), true, false); + testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, false); } public void testCaseInsensitiveEquality() throws IOException { @@ -198,15 +228,48 @@ public void testCaseInsensitiveEquality() throws IOException { | WHERE TO_LOWER(test) == "%value" """; String luceneQuery = switch (type) { - case "text", "auto", "match_only_text" -> "*:*"; - case "semantic_text" -> "FieldExistsQuery [field=_primary_term]"; - default -> throw new UnsupportedOperationException("unknown type [" + type + "]"); + case AUTO, CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> "*:*"; + case KEYWORD -> "CaseInsensitiveTermQuery{test:%value}"; + case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; - testPushQuery(value, esqlQuery, List.of(luceneQuery), true, true); + ComputeSignature dataNodeSignature = switch (type) { + case CONSTANT_KEYWORD, KEYWORD -> ComputeSignature.FILTER_IN_QUERY; + case AUTO, MATCH_ONLY_TEXT_WITH_KEYWORD, SEMANTIC_TEXT_WITH_KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_COMPUTE; + }; + testPushQuery(value, esqlQuery, List.of(luceneQuery), dataNodeSignature, true); } - private void testPushQuery(String value, String esqlQuery, List luceneQueryOptions, boolean filterInCompute, boolean found) - throws IOException { + enum ComputeSignature { + FILTER_IN_COMPUTE( + matchesList().item("LuceneSourceOperator") + .item("ValuesSourceReaderOperator") + .item("FilterOperator") + .item("LimitOperator") + .item("ProjectOperator") + .item("ExchangeSinkOperator") + ), + FILTER_IN_QUERY( + matchesList().item("LuceneSourceOperator") + .item("ValuesSourceReaderOperator") + .item("ProjectOperator") + .item("ExchangeSinkOperator") + ), + FIND_NONE(matchesList().item("LocalSourceOperator").item("ExchangeSinkOperator")); + + private final ListMatcher matcher; + + ComputeSignature(ListMatcher sig) { + this.matcher = sig; + } + } + + private void testPushQuery( + String value, + String esqlQuery, + List luceneQueryOptions, + ComputeSignature dataNodeSignature, + boolean found + ) throws IOException { indexValue(value); String differentValue = randomValueOtherThan(value, () -> randomAlphaOfLength(value.isEmpty() ? 1 : value.length())); @@ -222,7 +285,7 @@ private void testPushQuery(String value, String esqlQuery, List luceneQu .entry("planning", matchesMap().extraOk()) .entry("query", matchesMap().extraOk()) ), - matchesList().item(matchesMap().entry("name", "test").entry("type", "text")), + matchesList().item(matchesMap().entry("name", "test").entry("type", anyOf(equalTo("text"), equalTo("keyword")))), equalTo(found ? List.of(List.of(value)) : List.of()) ); Matcher luceneQueryMatcher = anyOf( @@ -246,12 +309,7 @@ private void testPushQuery(String value, String esqlQuery, List luceneQu String description = p.get("task_description").toString(); switch (description) { case "data" -> { - ListMatcher matcher = matchesList().item("LuceneSourceOperator").item("ValuesSourceReaderOperator"); - if (filterInCompute) { - matcher = matcher.item("FilterOperator").item("LimitOperator"); - } - matcher = matcher.item("ProjectOperator").item("ExchangeSinkOperator"); - assertMap(sig, matcher); + assertMap(sig, dataNodeSignature.matcher); } case "node_reduce" -> { if (sig.contains("LimitOperator")) { @@ -289,39 +347,11 @@ private void indexValue(String value) throws IOException { } }"""; json += switch (type) { - case "auto" -> ""; - case "semantic_text" -> """ - , - "mappings": { - "properties": { - "test": { - "type": "semantic_text", - "inference_id": "test", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - } - } - }"""; - default -> """ - , - "mappings": { - "properties": { - "test": { - "type": "%type", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } - } - } - } - }""".replace("%type", type); + case AUTO -> ""; + case CONSTANT_KEYWORD -> justType(); + case KEYWORD -> keyword(); + case SEMANTIC_TEXT_WITH_KEYWORD -> semanticTextWithKeyword(); + case TEXT_WITH_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> typeWithKeyword(); }; json += "}"; createIndex.setJsonEntity(json); @@ -341,6 +371,68 @@ private void indexValue(String value) throws IOException { assertThat(entityToMap(bulkResponse.getEntity(), XContentType.JSON), matchesMap().entry("errors", false).extraOk()); } + private String justType() { + return """ + , + "mappings": { + "properties": { + "test": { + "type": "%type" + } + } + }""".replace("%type", type.name().toLowerCase(Locale.ROOT)); + } + + private String keyword() { + return """ + , + "mappings": { + "properties": { + "test": { + "type": "keyword", + "ignore_above": 256 + } + } + }"""; + } + + private String typeWithKeyword() { + return """ + , + "mappings": { + "properties": { + "test": { + "type": "%type", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }""".replace("%type", type.name().replace("_WITH_KEYWORD", "").toLowerCase(Locale.ROOT)); + } + + private String semanticTextWithKeyword() { + return """ + , + "mappings": { + "properties": { + "test": { + "type": "semantic_text", + "inference_id": "test", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } + }"""; + } + private static final Pattern TO_NAME = Pattern.compile("\\[.+", Pattern.DOTALL); private static String checkOperatorProfile(Map o, Matcher query) { @@ -369,7 +461,7 @@ protected boolean preserveClusterUponCompletion() { @Before public void setUpTextEmbeddingInferenceEndpoint() throws IOException { - if (type.equals("semantic_text") == false || setupEmbeddings) { + if (type.needEmbeddings == false || setupEmbeddings) { return; } setupEmbeddings = true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java index aeb5ec4b4bf4a..24bd1bd61f938 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java @@ -319,7 +319,7 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(FieldAttribute.FieldName } @Override - public String constantValue(FieldName name) { + public String constantValue(FieldAttribute.FieldName name) { String val = null; for (SearchExecutionContext ctx : contexts) { MappedFieldType f = ctx.getFieldType(name.string());