diff --git a/docs/changelog/105593.yaml b/docs/changelog/105593.yaml new file mode 100644 index 0000000000000..4eef0d9404f42 --- /dev/null +++ b/docs/changelog/105593.yaml @@ -0,0 +1,5 @@ +pr: 105593 +summary: "ESQL: push down \"[text_field] is not null\"" +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 97b36859c1419..4aff4c689c077 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1195,3 +1195,25 @@ ROW a = 1 | STATS couNt(*) | SORT `couNt(*)` couNt(*):l 1 ; + +isNullWithStatsCount_On_TextField +FROM airports +| EVAL s = name, x = name +| WHERE s IS NULL +| STATS c = COUNT(x) +; + +c:l +0 +; + +isNotNullWithStatsCount_On_TextField +FROM airports +| EVAL s = name, x = name +| WHERE s IS NOT NULL +| STATS c = COUNT(x) +; + +c:l +891 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index 279ce3185d4aa..7ae8e029fd761 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -249,6 +249,11 @@ public static boolean canPushToSource(Expression exp, Predicate return canPushToSource(not.field(), hasIdenticalDelegate); } else if (exp instanceof UnaryScalarFunction usf) { if (usf instanceof RegexMatch || usf instanceof IsNull || usf instanceof IsNotNull) { + if (usf instanceof IsNull || usf instanceof IsNotNull) { + if (usf.field() instanceof FieldAttribute fa && fa.dataType().equals(DataTypes.TEXT)) { + return true; + } + } return isAttributePushable(usf.field(), usf, hasIdenticalDelegate); } } else if (exp instanceof CIDRMatch cidrMatch) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 55320cfbeca32..cf387245a5968 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -57,6 +57,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -399,6 +400,7 @@ public void testIsNotNullPushdownFilter() { /** * Expects + * * LimitExec[1000[INTEGER]] * \_ExchangeExec[[],false] * \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, job{f}#10, job.raw{f}#11, languages{f}#6, last_n @@ -420,6 +422,115 @@ public void testIsNullPushdownFilter() { assertThat(query.query().toString(), is(expected.toString())); } + /** + * Expects + * + * LimitExec[500[INTEGER]] + * \_AggregateExec[[],[COUNT(gender{f}#7) AS count(gender)],FINAL,null] + * \_ExchangeExec[[count{r}#15, seen{r}#16],true] + * \_AggregateExec[[],[COUNT(gender{f}#7) AS count(gender)],PARTIAL,8] + * \_FieldExtractExec[gender{f}#7] + * \_EsQueryExec[test], query[{"exists":{"field":"gender","boost":1.0}}][_doc{f}#17], limit[], sort[] estimatedRowSize[54] + */ + public void testIsNotNull_TextField_Pushdown() { + String textField = randomFrom("gender", "job"); + var plan = plan(String.format(Locale.ROOT, "from test | where %s is not null | stats count(%s)", textField, textField)); + + var limit = as(plan, LimitExec.class); + var finalAgg = as(limit.child(), AggregateExec.class); + var exchange = as(finalAgg.child(), ExchangeExec.class); + var partialAgg = as(exchange.child(), AggregateExec.class); + var fieldExtract = as(partialAgg.child(), FieldExtractExec.class); + var query = as(fieldExtract.child(), EsQueryExec.class); + var expected = QueryBuilders.existsQuery(textField); + assertThat(query.query().toString(), is(expected.toString())); + } + + /** + * Expects + * LimitExec[1000[INTEGER]] + * \_ExchangeExec[[],false] + * \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, job{f}#10, job.raw{f}#11, languages{f}#6, last_n + * ame{f}#7, long_noidx{f}#12, salary{f}#8]] + * \_FieldExtractExec[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..] + * \_EsQueryExec[test], query[{"bool":{"must_not":[{"exists":{"field":"gender","boost":1.0}}],"boost":1.0}}] + * [_doc{f}#13], limit[1000], sort[] estimatedRowSize[324] + */ + public void testIsNull_TextField_Pushdown() { + String textField = randomFrom("gender", "job"); + var plan = plan(String.format(Locale.ROOT, "from test | where %s is null", textField, textField)); + + var limit = as(plan, LimitExec.class); + var exchange = as(limit.child(), ExchangeExec.class); + var project = as(exchange.child(), ProjectExec.class); + var fieldExtract = as(project.child(), FieldExtractExec.class); + var query = as(fieldExtract.child(), EsQueryExec.class); + var expected = QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(textField)); + assertThat(query.query().toString(), is(expected.toString())); + } + + /** + * count(x) adds an implicit "exists(x)" filter in the pushed down query + * This test checks this "exists" doesn't clash with the "is null" pushdown on the text field. + * In this particular query, "exists(x)" and "x is null" cancel each other out. + * + * Expects + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[COUNT(job{f}#19) AS c],FINAL,8] + * \_ExchangeExec[[count{r}#22, seen{r}#23],true] + * \_LocalSourceExec[[count{r}#22, seen{r}#23],[LongVectorBlock[vector=ConstantLongVector[positions=1, value=0]], BooleanVectorBlock + * [vector=ConstantBooleanVector[positions=1, value=true]]]] + */ + public void testIsNull_TextField_Pushdown_WithCount() { + var plan = plan(""" + from test + | eval filtered_job = job, count_job = job + | where filtered_job IS NULL + | stats c = COUNT(count_job) + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + var exg = as(agg.child(), ExchangeExec.class); + as(exg.child(), LocalSourceExec.class); + } + + /** + * count(x) adds an implicit "exists(x)" filter in the pushed down query. + * This test checks this "exists" doesn't clash with the "is null" pushdown on the text field. + * In this particular query, "exists(x)" and "x is not null" go hand in hand and the query is pushed down to Lucene. + * + * Expects + * + * LimitExec[1000[INTEGER]] + * \_AggregateExec[[],[COUNT(job{f}#19) AS c],FINAL,8] + * \_ExchangeExec[[count{r}#22, seen{r}#23],true] + * \_EsStatsQueryExec[test], stats[Stat[name=job, type=COUNT, query={ + * "exists" : { + * "field" : "job", + * "boost" : 1.0 + * } + * }]]], query[{"exists":{"field":"job","boost":1.0}}][count{r}#25, seen{r}#26], limit[], + */ + public void testIsNotNull_TextField_Pushdown_WithCount() { + var plan = plan(""" + from test + | eval filtered_job = job, count_job = job + | where filtered_job IS NOT NULL + | stats c = COUNT(count_job) + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + var exg = as(agg.child(), ExchangeExec.class); + var esStatsQuery = as(exg.child(), EsStatsQueryExec.class); + assertThat(esStatsQuery.limit(), is(nullValue())); + assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + var stat = as(esStatsQuery.stats().get(0), Stat.class); + assertThat(stat.query(), is(QueryBuilders.existsQuery("job"))); + } + /** * Expects * LimitExec[1000[INTEGER]] diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/81_text_exact_subfields.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/81_text_exact_subfields.yml index 64d4665e3cfe7..3b58ee01edfa0 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/81_text_exact_subfields.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/81_text_exact_subfields.yml @@ -147,6 +147,29 @@ setup: - length: { values: 0 } + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'from test | where text_ignore_above is not null | keep text_ignore_above, text_ignore_above.raw, text_normalizer, text_normalizer.raw, non_indexed, non_indexed.raw' + + - match: { columns.0.name: "text_ignore_above" } + - match: { columns.0.type: "text" } + - match: { columns.1.name: "text_ignore_above.raw" } + - match: { columns.1.type: "keyword" } + - match: { columns.2.name: "text_normalizer" } + - match: { columns.2.type: "text" } + - match: { columns.3.name: "text_normalizer.raw" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "non_indexed" } + - match: { columns.4.type: "text" } + - match: { columns.5.name: "non_indexed.raw" } + - match: { columns.5.type: "keyword" } + + - length: { values: 2 } + + - do: allowed_warnings_regex: - "No limit defined, adding default limit of \\[.*\\]"