From 5ec4b7df59a5f011560b24100beb0e590a15d692 Mon Sep 17 00:00:00 2001 From: zhanglirich Date: Tue, 22 May 2018 21:41:35 +0800 Subject: [PATCH] HIVE-19653: Incorrect predicate pushdown for groupby with grouping sets --- .../resources/testconfiguration.properties | 2 + .../hive/ql/ppd/ExprWalkerProcFactory.java | 24 +++++++- .../groupby_grouping_sets_pushdown1.q | 20 +++++++ .../groupby_grouping_sets_pushdown1.q.out | 57 +++++++++++++++++++ 4 files changed, 101 insertions(+), 2 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q create mode 100644 ql/src/test/results/clientpositive/groupby_grouping_sets_pushdown1.q.out diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 6528ec6db022..349641169c3f 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -855,6 +855,7 @@ minillaplocal.query.files=\ optimize_nullscan.q,\ parquet_types.q,\ groupby_grouping_id2.q,\ + groupby_grouping_sets_pushdown1.q, \ constprog_semijoin.q,\ ppd_union_view.q,\ smb_mapjoin_19.q,\ @@ -1140,6 +1141,7 @@ spark.query.files=add_part_multiple.q, \ groupby_complex_types_multi_single_reducer.q, \ groupby_cube1.q, \ groupby_grouping_id2.q, \ + groupby_grouping_sets_pushdown1.q, \ groupby_map_ppr.q, \ groupby_map_ppr_multi_distinct.q, \ groupby_multi_insert_common_distinct.q, \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index b01a9bad7e65..5f2611656b9e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -85,15 +85,35 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // replace the output expression with the input expression so that // parent op can understand this expression ExprNodeDesc exp = op.getColumnExprMap().get(colref.getColumn()); - // if the operator is a groupby and we are referencing the grouping - // id column, we cannot push the predicate + if (op instanceof GroupByOperator) { GroupByOperator groupBy = (GroupByOperator) op; if (groupBy.getConf().isGroupingSetsPresent()) { int groupingSetPlaceholderPos = groupBy.getConf().getKeys().size() - 1; + int groupKeyIndex = groupBy.getConf().getKeys().indexOf(exp); + + // if the operator is a groupby and we are referencing the grouping + // id column, we cannot push the predicate if (colref.getColumn().equals(groupBy.getSchema().getColumnNames().get(groupingSetPlaceholderPos))) { exp = null; } + + // if grouping set is given (.i.e group with cube) and column is + // missed in some grouping set, we cannot push the predicate + // since the column value can be null + if (groupKeyIndex >= 0 && groupKeyIndex != groupingSetPlaceholderPos) { + List groupingIds = groupBy.getConf().getListGroupingSets(); + boolean hasMissedGroup = false; + for (long groupingId: groupingIds) { + if ((groupingId & (1 << groupKeyIndex)) == 0) { + hasMissedGroup = true; + break; + } + } + if (hasMissedGroup) { + exp = null; + } + } } } if (exp == null) { diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q new file mode 100644 index 000000000000..c0e349c5d3b1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q @@ -0,0 +1,20 @@ +SET hive.cbo.enable=false; + +CREATE TABLE T1(a STRING, b STRING, s BIGINT); +INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456); + +-- should not pushdown, otherwise the filter 'a IS NOT NULL' will take no effect +-- SORT_QUERY_RESULTS +SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b)) +) t WHERE a IS NOT NULL; + +-- should pushdown +-- SORT_QUERY_RESULTS +SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE a IS NOT NULL; diff --git a/ql/src/test/results/clientpositive/groupby_grouping_sets_pushdown1.q.out b/ql/src/test/results/clientpositive/groupby_grouping_sets_pushdown1.q.out new file mode 100644 index 000000000000..f716fc9d3793 --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby_grouping_sets_pushdown1.q.out @@ -0,0 +1,57 @@ +PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t1 +POSTHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.a SCRIPT [] +POSTHOOK: Lineage: t1.b SCRIPT [] +POSTHOOK: Lineage: t1.s SCRIPT [] +PREHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b)) +) t WHERE a IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b)) +) t WHERE a IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 123456 +NULL bbb 123456 +aaa NULL 123456 +aaa bbb 123456 +PREHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE a IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE a IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +aaa NULL 123456 +aaa bbb 123456