diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupingSetOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupingSetOptimizer.java index 2ebbf0489056..4563aea73dc3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupingSetOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupingSetOptimizer.java @@ -182,6 +182,12 @@ private boolean isParentOpFeasible(Operator parentOp) { } private String selectPartitionColumn(GroupByOperator gby, Operator parentOp) { + if (parentOp.getColumnExprMap() == null) { + LOG.debug("Skip grouping-set optimization as the parent operator {} does not define a column " + + "expression mapping", parentOp); + return null; + } + if (parentOp.getSchema() == null || parentOp.getSchema().getSignature() == null) { LOG.debug("Skip grouping-set optimization as the parent operator {} does not provide signature", parentOp); diff --git a/ql/src/test/queries/clientpositive/groupingset_optimize_hive_28489.q b/ql/src/test/queries/clientpositive/groupingset_optimize_hive_28489.q index a8e332808d24..b9f81f6af956 100644 --- a/ql/src/test/queries/clientpositive/groupingset_optimize_hive_28489.q +++ b/ql/src/test/queries/clientpositive/groupingset_optimize_hive_28489.q @@ -1,6 +1,22 @@ -- SORT_QUERY_RESULTS create table grp_set_test (key string, value string, col0 int, col1 int, col2 int, col3 int); + +-- UNION case, can't be optimized +set hive.optimize.grouping.set.threshold=1; +with sub_qr as (select col2 from grp_set_test) +select grpBy_col, sum(col2) +from +( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x +group by grpBy_col with rollup; + +explain +with sub_qr as (select col2 from grp_set_test) +select grpBy_col, sum(col2) +from +( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x +group by grpBy_col with rollup; + insert into grp_set_test values (1, 1, 1, 1, 1, 1), (1, 1, 1, 2, 2, 10), (1, 1, 1, 2, 3, 100); -- Should not be optimized diff --git a/ql/src/test/results/clientpositive/llap/groupingset_optimize_hive_28489.q.out b/ql/src/test/results/clientpositive/llap/groupingset_optimize_hive_28489.q.out index 5d0c17d370b8..4b87ccfdc52d 100644 --- a/ql/src/test/results/clientpositive/llap/groupingset_optimize_hive_28489.q.out +++ b/ql/src/test/results/clientpositive/llap/groupingset_optimize_hive_28489.q.out @@ -6,6 +6,137 @@ POSTHOOK: query: create table grp_set_test (key string, value string, col0 int, POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@grp_set_test +PREHOOK: query: with sub_qr as (select col2 from grp_set_test) +select grpBy_col, sum(col2) +from +( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x +group by grpBy_col with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@grp_set_test +#### A masked pattern was here #### +POSTHOOK: query: with sub_qr as (select col2 from grp_set_test) +select grpBy_col, sum(col2) +from +( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x +group by grpBy_col with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@grp_set_test +#### A masked pattern was here #### +NULL NULL +PREHOOK: query: explain +with sub_qr as (select col2 from grp_set_test) +select grpBy_col, sum(col2) +from +( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x +group by grpBy_col with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@grp_set_test +#### A masked pattern was here #### +POSTHOOK: query: explain +with sub_qr as (select col2 from grp_set_test) +select grpBy_col, sum(col2) +from +( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x +group by grpBy_col with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@grp_set_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: grp_set_test + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'abc' (type: string), col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), 0L (type: bigint) + grouping sets: 0, 1 + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: grp_set_test + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 'def' (type: string), col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string), 0L (type: bigint) + grouping sets: 0, 1 + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: insert into grp_set_test values (1, 1, 1, 1, 1, 1), (1, 1, 1, 2, 2, 10), (1, 1, 1, 2, 3, 100) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table