Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/105593.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 105593
summary: "ESQL: push down \"[text_field] is not null\""
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -1195,3 +1195,25 @@ ROW a = 1 | STATS couNt(*) | SORT `couNt(*)`
couNt(*):l
1
;

isNullWithStatsCount_On_TextField
FROM airports
| EVAL s = name, x = name
| WHERE s IS NULL
| STATS c = COUNT(x)
;

c:l
0
;

isNotNullWithStatsCount_On_TextField
FROM airports
| EVAL s = name, x = name
| WHERE s IS NOT NULL
| STATS c = COUNT(x)
;

c:l
891
;
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ public static boolean canPushToSource(Expression exp, Predicate<FieldAttribute>
return canPushToSource(not.field(), hasIdenticalDelegate);
} else if (exp instanceof UnaryScalarFunction usf) {
if (usf instanceof RegexMatch<?> || usf instanceof IsNull || usf instanceof IsNotNull) {
if (usf instanceof IsNull || usf instanceof IsNotNull) {
if (usf.field() instanceof FieldAttribute fa && fa.dataType().equals(DataTypes.TEXT)) {
return true;
}
}
return isAttributePushable(usf.field(), usf, hasIdenticalDelegate);
}
} else if (exp instanceof CIDRMatch cidrMatch) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -399,6 +400,7 @@ public void testIsNotNullPushdownFilter() {

/**
* Expects
*
* LimitExec[1000[INTEGER]]
* \_ExchangeExec[[],false]
* \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, job{f}#10, job.raw{f}#11, languages{f}#6, last_n
Expand All @@ -420,6 +422,115 @@ public void testIsNullPushdownFilter() {
assertThat(query.query().toString(), is(expected.toString()));
}

/**
* Expects
*
* LimitExec[500[INTEGER]]
* \_AggregateExec[[],[COUNT(gender{f}#7) AS count(gender)],FINAL,null]
* \_ExchangeExec[[count{r}#15, seen{r}#16],true]
* \_AggregateExec[[],[COUNT(gender{f}#7) AS count(gender)],PARTIAL,8]
* \_FieldExtractExec[gender{f}#7]
* \_EsQueryExec[test], query[{"exists":{"field":"gender","boost":1.0}}][_doc{f}#17], limit[], sort[] estimatedRowSize[54]
*/
public void testIsNotNull_TextField_Pushdown() {
String textField = randomFrom("gender", "job");
var plan = plan(String.format(Locale.ROOT, "from test | where %s is not null | stats count(%s)", textField, textField));

var limit = as(plan, LimitExec.class);
var finalAgg = as(limit.child(), AggregateExec.class);
var exchange = as(finalAgg.child(), ExchangeExec.class);
var partialAgg = as(exchange.child(), AggregateExec.class);
var fieldExtract = as(partialAgg.child(), FieldExtractExec.class);
var query = as(fieldExtract.child(), EsQueryExec.class);
var expected = QueryBuilders.existsQuery(textField);
assertThat(query.query().toString(), is(expected.toString()));
}

/**
* Expects
* LimitExec[1000[INTEGER]]
* \_ExchangeExec[[],false]
* \_ProjectExec[[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gender{f}#5, job{f}#10, job.raw{f}#11, languages{f}#6, last_n
* ame{f}#7, long_noidx{f}#12, salary{f}#8]]
* \_FieldExtractExec[_meta_field{f}#9, emp_no{f}#3, first_name{f}#4, gen..]
* \_EsQueryExec[test], query[{"bool":{"must_not":[{"exists":{"field":"gender","boost":1.0}}],"boost":1.0}}]
* [_doc{f}#13], limit[1000], sort[] estimatedRowSize[324]
*/
public void testIsNull_TextField_Pushdown() {
String textField = randomFrom("gender", "job");
var plan = plan(String.format(Locale.ROOT, "from test | where %s is null", textField, textField));

var limit = as(plan, LimitExec.class);
var exchange = as(limit.child(), ExchangeExec.class);
var project = as(exchange.child(), ProjectExec.class);
var fieldExtract = as(project.child(), FieldExtractExec.class);
var query = as(fieldExtract.child(), EsQueryExec.class);
var expected = QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(textField));
assertThat(query.query().toString(), is(expected.toString()));
}

/**
* count(x) adds an implicit "exists(x)" filter in the pushed down query
* This test checks this "exists" doesn't clash with the "is null" pushdown on the text field.
* In this particular query, "exists(x)" and "x is null" cancel each other out.
*
* Expects
*
* LimitExec[1000[INTEGER]]
* \_AggregateExec[[],[COUNT(job{f}#19) AS c],FINAL,8]
* \_ExchangeExec[[count{r}#22, seen{r}#23],true]
* \_LocalSourceExec[[count{r}#22, seen{r}#23],[LongVectorBlock[vector=ConstantLongVector[positions=1, value=0]], BooleanVectorBlock
* [vector=ConstantBooleanVector[positions=1, value=true]]]]
*/
public void testIsNull_TextField_Pushdown_WithCount() {
var plan = plan("""
from test
| eval filtered_job = job, count_job = job
| where filtered_job IS NULL
| stats c = COUNT(count_job)
""", IS_SV_STATS);

var limit = as(plan, LimitExec.class);
var agg = as(limit.child(), AggregateExec.class);
var exg = as(agg.child(), ExchangeExec.class);
as(exg.child(), LocalSourceExec.class);
}

/**
* count(x) adds an implicit "exists(x)" filter in the pushed down query.
* This test checks this "exists" doesn't clash with the "is null" pushdown on the text field.
* In this particular query, "exists(x)" and "x is not null" go hand in hand and the query is pushed down to Lucene.
*
* Expects
*
* LimitExec[1000[INTEGER]]
* \_AggregateExec[[],[COUNT(job{f}#19) AS c],FINAL,8]
* \_ExchangeExec[[count{r}#22, seen{r}#23],true]
* \_EsStatsQueryExec[test], stats[Stat[name=job, type=COUNT, query={
* "exists" : {
* "field" : "job",
* "boost" : 1.0
* }
* }]]], query[{"exists":{"field":"job","boost":1.0}}][count{r}#25, seen{r}#26], limit[],
*/
public void testIsNotNull_TextField_Pushdown_WithCount() {
var plan = plan("""
from test
| eval filtered_job = job, count_job = job
| where filtered_job IS NOT NULL
| stats c = COUNT(count_job)
""", IS_SV_STATS);

var limit = as(plan, LimitExec.class);
var agg = as(limit.child(), AggregateExec.class);
var exg = as(agg.child(), ExchangeExec.class);
var esStatsQuery = as(exg.child(), EsStatsQueryExec.class);
assertThat(esStatsQuery.limit(), is(nullValue()));
assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen"));
var stat = as(esStatsQuery.stats().get(0), Stat.class);
assertThat(stat.query(), is(QueryBuilders.existsQuery("job")));
}

/**
* Expects
* LimitExec[1000[INTEGER]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,29 @@ setup:
- length: { values: 0 }


- do:
allowed_warnings_regex:
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'from test | where text_ignore_above is not null | keep text_ignore_above, text_ignore_above.raw, text_normalizer, text_normalizer.raw, non_indexed, non_indexed.raw'

- match: { columns.0.name: "text_ignore_above" }
- match: { columns.0.type: "text" }
- match: { columns.1.name: "text_ignore_above.raw" }
- match: { columns.1.type: "keyword" }
- match: { columns.2.name: "text_normalizer" }
- match: { columns.2.type: "text" }
- match: { columns.3.name: "text_normalizer.raw" }
- match: { columns.3.type: "keyword" }
- match: { columns.4.name: "non_indexed" }
- match: { columns.4.type: "text" }
- match: { columns.5.name: "non_indexed.raw" }
- match: { columns.5.type: "keyword" }

- length: { values: 2 }


- do:
allowed_warnings_regex:
- "No limit defined, adding default limit of \\[.*\\]"
Expand Down