From 227f5fa06f085db3d3a42e9efc463e917d281b27 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 28 Apr 2025 13:50:08 +0200 Subject: [PATCH] ESQL: Fix count optimization with pushable union types (#127225) When pushing down STATS count(field::type) to Lucene for a union-typed field, use the correct field name in the Lucene query and not the synthetic attribute name $$field$converted_to$type. --- docs/changelog/127225.yaml | 6 ++ .../src/main/resources/union_types.csv-spec | 67 +++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 6 ++ .../local/ReplaceMissingFieldWithNull.java | 6 +- .../physical/local/PushStatsToSource.java | 8 ++- 5 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 docs/changelog/127225.yaml diff --git a/docs/changelog/127225.yaml b/docs/changelog/127225.yaml new file mode 100644 index 0000000000000..b161a5c40bfdf --- /dev/null +++ b/docs/changelog/127225.yaml @@ -0,0 +1,6 @@ +pr: 127225 +summary: Fix count optimization with pushable union types +area: ES|QL +type: bug +issues: + - 127200 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index 8b19bc589fcff..aa08799943ade 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -393,6 +393,73 @@ count:long | @timestamp:date 0 | 2023-10-23T13:50:00.000Z ; +multiIndexIpStatsNonPushableCount +// Could be pushed to Lucene if we knew whether ip fields are single valued or not. +// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword. +required_capability: union_types +required_capability: fix_count_pushdown_for_union_types + +FROM sample_data, sample_data_str +| STATS count=count(client_ip::ip) +; + +count:long +14 +; + +multiIndexIpStatsNonPushableCountEval +// Could be pushed to Lucene if we knew whether ip fields are single valued or not. +// May still be pushed down on multi-node environments if a node has only the index where client_ip is keyword. +required_capability: union_types +required_capability: fix_count_pushdown_for_union_types + +FROM sample_data, sample_data_str +| EVAL client_ip = client_ip::ip +| STATS count=count(client_ip) +; + +count:long +14 +; + +multiIndexIpStatsNonPushableCountWithFilter +// Currently not pushable: has 2 aggs and we don't consider multi-typed fields aggregatable. +required_capability: union_types +required_capability: fix_count_pushdown_for_union_types + +FROM sample_data, sample_data_ts_long +| STATS count_matching=count(@timestamp::long) WHERE @timestamp::long >= 1698069301543, + total_count=count(@timestamp::long) +; + +count_matching:long | total_count:long +2 | 14 +; + +multiIndexIpStatsPushableCount +required_capability: union_types +required_capability: fix_count_pushdown_for_union_types + +FROM sample_data, sample_data_ts_long +| STATS count=count(@timestamp::long) +; + +count:long +14 +; + +multiIndexIpStatsPushableCountEval +required_capability: union_types +required_capability: fix_count_pushdown_for_union_types + +FROM sample_data, sample_data_ts_long +| EVAL @timestamp = @timestamp::long +| STATS count=count(@timestamp) +; + +count:long +14 +; multiIndexIpStringStatsInline2 required_capability: union_types diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index f01e2043d5c13..65e1237ef6b9e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -375,6 +375,12 @@ public enum Cap { */ UNION_TYPES_AGG_CAST, + /** + * When pushing down {@code STATS count(field::type)} for a union type field, we wrongly used a synthetic attribute name in the + * query instead of the actual field name. This led to 0 counts instead of the correct result. + */ + FIX_COUNT_PUSHDOWN_FOR_UNION_TYPES, + /** * Fix to GROK validation in case of multiple fields with same name and different types * https://github.com/elastic/elasticsearch/issues/110533 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 64512351644b4..0441539a92a82 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -65,12 +65,12 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog private LogicalPlan missingToNull(LogicalPlan plan, Predicate shouldBeRetained) { if (plan instanceof EsRelation relation) { - // Remove missing fields from the EsRelation because this is not where we will obtain them from; replace them by an Eval right - // after, instead. This allows us to safely re-use the attribute ids of the corresponding FieldAttributes. + // For any missing field, place an Eval right after the EsRelation to assign null values to that attribute (using the same name + // id!), thus avoiding that InsertFieldExtrations inserts a field extraction later. // This means that an EsRelation[field1, field2, field3] where field1 and field 3 are missing will be replaced by // Project[field1, field2, field3] <- keeps the ordering intact // \_Eval[field1 = null, field3 = null] - // \_EsRelation[field2] + // \_EsRelation[field1, field2, field3] List relationOutput = relation.output(); Map nullLiterals = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); List newProjections = new ArrayList<>(relationOutput.size()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java index e3954688aed41..3fec1644b92d2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java @@ -94,15 +94,19 @@ private Tuple, List> pushableStats( // check if regular field else { if (target instanceof FieldAttribute fa) { - var fName = fa.name(); + var fName = fa.fieldName(); if (context.searchStats().isSingleValue(fName)) { - fieldName = fa.name(); + fieldName = fName; query = QueryBuilders.existsQuery(fieldName); } } } if (fieldName != null) { if (count.hasFilter()) { + // Note: currently, it seems like we never actually perform stats pushdown if we reach here. + // That's because stats pushdown only works for 1 agg function (without BY); but in that case, filters + // are extracted into a separate filter node upstream from the aggregation (and hopefully pushed into + // the EsQueryExec separately). if (canPushToSource(count.filter()) == false) { return null; // can't push down }