From 3bf5a0aa8bd752a1422634950bd39a44469fc94e Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Wed, 27 Nov 2024 14:59:42 +0100 Subject: [PATCH 1/2] ESQL: fix COUNT filter pushdown (#117503) If `COUNT` agg has a filter applied, this must also be push down to source. This currently does not happen, but this issue is masked currently by two factors: * a logical optimisation, `ExtractAggregateCommonFilter` that extracts the filter out of the STATS entirely (and pushes it to source then from a `WHERE`); * the phisical plan optimisation implementing the push down, `PushStatsToSource`, currently only applies if there's just one agg function to push down. However, this fix needs to be applied since: * it's still present in versions prior to `ExtractAggregateCommonFilter` introduction; * the defect might resurface when the restriction in `PushStatsToSource` is lifted. Fixes #115522. (cherry picked from commit 560e0c5d0441a165f4588f8af869053b5202999f) --- docs/changelog/117503.yaml | 6 ++ .../src/main/resources/stats.csv-spec | 51 ++++++++++++++ .../physical/local/PushStatsToSource.java | 11 ++++ .../LocalPhysicalPlanOptimizerTests.java | 66 +++++++++++++++++++ .../esql/optimizer/TestPlannerOptimizer.java | 6 +- 5 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/117503.yaml diff --git a/docs/changelog/117503.yaml b/docs/changelog/117503.yaml new file mode 100644 index 0000000000000..d48741262b581 --- /dev/null +++ b/docs/changelog/117503.yaml @@ -0,0 +1,6 @@ +pr: 117503 +summary: Fix COUNT filter pushdown +area: ES|QL +type: bug +issues: + - 115522 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 0b0f844f7b8f2..6b0d6f226c235 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2537,6 +2537,57 @@ c2:l |c2_f:l |m2:i |m2_f:i |c:l 1 |1 |5 |5 |21 ; +simpleCountOnFieldWithFilteringAndNoGrouping +required_capability: per_agg_filtering +from employees +| stats c1 = count(emp_no) where emp_no < 10042 +; + +c1:long +41 +; + +simpleCountOnFieldWithFilteringOnDifferentFieldAndNoGrouping +required_capability: per_agg_filtering +from employees +| stats c1 = count(hire_date) where emp_no < 10042 +; + +c1:long +41 +; + +simpleCountOnStarWithFilteringAndNoGrouping +required_capability: per_agg_filtering +from employees +| stats c1 = count(*) where emp_no < 10042 +; + +c1:long +41 +; + +simpleCountWithFilteringAndNoGroupingOnFieldWithNulls +required_capability: per_agg_filtering +from employees +| stats c1 = count(birth_date) where emp_no <= 10050 +; + +c1:long +40 +; + + +simpleCountWithFilteringAndNoGroupingOnFieldWithMultivalues +required_capability: per_agg_filtering +from employees +| stats c1 = count(job_positions) where emp_no <= 10003 +; + +c1:long +3 +; + filterIsAlwaysTrue required_capability: per_agg_filtering FROM employees diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java index b0b86b43cd162..21bc360404628 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.util.Queries; import org.elasticsearch.xpack.esql.core.util.StringUtils; import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; @@ -25,12 +26,15 @@ import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.AbstractPhysicalOperationProviders; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; import java.util.ArrayList; import java.util.List; +import static java.util.Arrays.asList; import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; +import static org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushFiltersToSource.canPushToSource; import static org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.StatsType.COUNT; /** @@ -98,6 +102,13 @@ private Tuple, List> pushableStats( } } if (fieldName != null) { + if (count.hasFilter()) { + if (canPushToSource(count.filter()) == false) { + return null; // can't push down + } + var countFilter = PlannerUtils.TRANSLATOR_HANDLER.asQuery(count.filter()); + query = Queries.combine(Queries.Clause.MUST, asList(countFilter.asBuilder(), query)); + } return new EsStatsQueryExec.Stat(fieldName, COUNT, query); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 3dd0828b82eed..af185458a51b1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -41,7 +41,9 @@ import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter; import org.elasticsearch.xpack.esql.plan.logical.Enrich; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; @@ -58,15 +60,18 @@ import org.elasticsearch.xpack.esql.planner.FilterTests; import org.elasticsearch.xpack.esql.plugin.QueryPragmas; import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; +import org.elasticsearch.xpack.esql.rule.Rule; import org.elasticsearch.xpack.esql.session.Configuration; import org.elasticsearch.xpack.esql.stats.Metrics; import org.elasticsearch.xpack.esql.stats.SearchStats; import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.function.Function; import static java.util.Arrays.asList; import static org.elasticsearch.compute.aggregation.AggregatorMode.FINAL; @@ -373,6 +378,67 @@ public void testMultiCountAllWithFilter() { assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); } + @SuppressWarnings("unchecked") + public void testSingleCountWithStatsFilter() { + // an optimizer that filters out the ExtractAggregateCommonFilter rule + var logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config)) { + @Override + protected List> batches() { + var oldBatches = super.batches(); + List> newBatches = new ArrayList<>(oldBatches.size()); + for (var batch : oldBatches) { + List> rules = new ArrayList<>(List.of(batch.rules())); + rules.removeIf(r -> r instanceof ExtractAggregateCommonFilter); + newBatches.add(batch.with(rules.toArray(Rule[]::new))); + } + return newBatches; + } + }; + var analyzer = makeAnalyzer("mapping-default.json"); + var plannerOptimizer = new TestPlannerOptimizer(config, analyzer, logicalOptimizer); + var plan = plannerOptimizer.plan(""" + from test + | stats c = count(hire_date) where emp_no < 10042 + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertThat(agg.getMode(), is(FINAL)); + var exchange = as(agg.child(), ExchangeExec.class); + var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); + + Function compact = s -> s.replaceAll("\\s+", ""); + assertThat(compact.apply(esStatsQuery.query().toString()), is(compact.apply(""" + { + "bool": { + "must": [ + { + "exists": { + "field": "hire_date", + "boost": 1.0 + } + }, + { + "esql_single_value": { + "field": "emp_no", + "next": { + "range": { + "emp_no": { + "lt": 10042, + "boost": 1.0 + } + } + }, + "source": "emp_no < 10042@2:36" + } + } + ], + "boost": 1.0 + } + } + """))); + } + /** * Expecting * LimitExec[1000[INTEGER]] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java index 1d25146ee4e2d..bc4a5412573ab 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java @@ -28,11 +28,15 @@ public class TestPlannerOptimizer { private final Configuration config; public TestPlannerOptimizer(Configuration config, Analyzer analyzer) { + this(config, analyzer, new LogicalPlanOptimizer(new LogicalOptimizerContext(config))); + } + + public TestPlannerOptimizer(Configuration config, Analyzer analyzer, LogicalPlanOptimizer logicalOptimizer) { this.analyzer = analyzer; this.config = config; + this.logicalOptimizer = logicalOptimizer; parser = new EsqlParser(); - logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config)); physicalPlanOptimizer = new PhysicalPlanOptimizer(new PhysicalOptimizerContext(config)); functionRegistry = new EsqlFunctionRegistry(); mapper = new Mapper(functionRegistry); From 55effc001b59523cfcff63813f496fe275482d48 Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Thu, 28 Nov 2024 10:22:32 +0100 Subject: [PATCH 2/2] 8.16 adaptation --- .../physical/local/PushStatsToSource.java | 2 +- .../LocalPhysicalPlanOptimizerTests.java | 37 +++++-------------- .../esql/optimizer/TestPlannerOptimizer.java | 6 +-- 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java index 21bc360404628..11fba80dc6f79 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java @@ -103,7 +103,7 @@ private Tuple, List> pushableStats( } if (fieldName != null) { if (count.hasFilter()) { - if (canPushToSource(count.filter()) == false) { + if (canPushToSource(count.filter(), fa -> false) == false) { return null; // can't push down } var countFilter = PlannerUtils.TRANSLATOR_HANDLER.asQuery(count.filter()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index af185458a51b1..782377acb6556 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -41,9 +41,7 @@ import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.IndexResolution; -import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter; import org.elasticsearch.xpack.esql.plan.logical.Enrich; -import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; @@ -60,14 +58,12 @@ import org.elasticsearch.xpack.esql.planner.FilterTests; import org.elasticsearch.xpack.esql.plugin.QueryPragmas; import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; -import org.elasticsearch.xpack.esql.rule.Rule; import org.elasticsearch.xpack.esql.session.Configuration; import org.elasticsearch.xpack.esql.stats.Metrics; import org.elasticsearch.xpack.esql.stats.SearchStats; import org.junit.Before; import java.io.IOException; -import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; @@ -380,22 +376,8 @@ public void testMultiCountAllWithFilter() { @SuppressWarnings("unchecked") public void testSingleCountWithStatsFilter() { - // an optimizer that filters out the ExtractAggregateCommonFilter rule - var logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config)) { - @Override - protected List> batches() { - var oldBatches = super.batches(); - List> newBatches = new ArrayList<>(oldBatches.size()); - for (var batch : oldBatches) { - List> rules = new ArrayList<>(List.of(batch.rules())); - rules.removeIf(r -> r instanceof ExtractAggregateCommonFilter); - newBatches.add(batch.with(rules.toArray(Rule[]::new))); - } - return newBatches; - } - }; - var analyzer = makeAnalyzer("mapping-default.json"); - var plannerOptimizer = new TestPlannerOptimizer(config, analyzer, logicalOptimizer); + var analyzer = makeAnalyzer("mapping-default.json", new EnrichResolution()); + var plannerOptimizer = new TestPlannerOptimizer(config, analyzer); var plan = plannerOptimizer.plan(""" from test | stats c = count(hire_date) where emp_no < 10042 @@ -406,18 +388,13 @@ protected List> batches() { assertThat(agg.getMode(), is(FINAL)); var exchange = as(agg.child(), ExchangeExec.class); var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); + assertThat(esStatsQuery.stats().size(), is(1)); Function compact = s -> s.replaceAll("\\s+", ""); - assertThat(compact.apply(esStatsQuery.query().toString()), is(compact.apply(""" + assertThat(compact.apply(esStatsQuery.stats().get(0).query().toString()), is(compact.apply(""" { "bool": { "must": [ - { - "exists": { - "field": "hire_date", - "boost": 1.0 - } - }, { "esql_single_value": { "field": "emp_no", @@ -431,6 +408,12 @@ protected List> batches() { }, "source": "emp_no < 10042@2:36" } + }, + { + "exists": { + "field": "hire_date", + "boost": 1.0 + } } ], "boost": 1.0 diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java index bc4a5412573ab..1d25146ee4e2d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java @@ -28,15 +28,11 @@ public class TestPlannerOptimizer { private final Configuration config; public TestPlannerOptimizer(Configuration config, Analyzer analyzer) { - this(config, analyzer, new LogicalPlanOptimizer(new LogicalOptimizerContext(config))); - } - - public TestPlannerOptimizer(Configuration config, Analyzer analyzer, LogicalPlanOptimizer logicalOptimizer) { this.analyzer = analyzer; this.config = config; - this.logicalOptimizer = logicalOptimizer; parser = new EsqlParser(); + logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config)); physicalPlanOptimizer = new PhysicalPlanOptimizer(new PhysicalOptimizerContext(config)); functionRegistry = new EsqlFunctionRegistry(); mapper = new Mapper(functionRegistry);