Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ private boolean isParentOpFeasible(Operator<?> parentOp) {
}

private String selectPartitionColumn(GroupByOperator gby, Operator<?> parentOp) {
if (parentOp.getColumnExprMap() == null) {
LOG.debug("Skip grouping-set optimization as the parent operator {} does not define a column " +
"expression mapping", parentOp);
return null;
}

if (parentOp.getSchema() == null || parentOp.getSchema().getSignature() == null) {
LOG.debug("Skip grouping-set optimization as the parent operator {} does not provide signature",
parentOp);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
-- SORT_QUERY_RESULTS

create table grp_set_test (key string, value string, col0 int, col1 int, col2 int, col3 int);

-- UNION case, can't be optimized
set hive.optimize.grouping.set.threshold=1;
with sub_qr as (select col2 from grp_set_test)
select grpBy_col, sum(col2)
from
( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x
group by grpBy_col with rollup;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I confirmed the master branch definitely throws a NPE


explain
with sub_qr as (select col2 from grp_set_test)
select grpBy_col, sum(col2)
from
( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x
group by grpBy_col with rollup;

insert into grp_set_test values (1, 1, 1, 1, 1, 1), (1, 1, 1, 2, 2, 10), (1, 1, 1, 2, 3, 100);

-- Should not be optimized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,137 @@ POSTHOOK: query: create table grp_set_test (key string, value string, col0 int,
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@grp_set_test
PREHOOK: query: with sub_qr as (select col2 from grp_set_test)
select grpBy_col, sum(col2)
from
( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x
group by grpBy_col with rollup
PREHOOK: type: QUERY
PREHOOK: Input: default@grp_set_test
#### A masked pattern was here ####
POSTHOOK: query: with sub_qr as (select col2 from grp_set_test)
select grpBy_col, sum(col2)
from
( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x
group by grpBy_col with rollup
POSTHOOK: type: QUERY
POSTHOOK: Input: default@grp_set_test
#### A masked pattern was here ####
NULL NULL
PREHOOK: query: explain
with sub_qr as (select col2 from grp_set_test)
select grpBy_col, sum(col2)
from
( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x
group by grpBy_col with rollup
PREHOOK: type: QUERY
PREHOOK: Input: default@grp_set_test
#### A masked pattern was here ####
POSTHOOK: query: explain
with sub_qr as (select col2 from grp_set_test)
select grpBy_col, sum(col2)
from
( select 'abc' as grpBy_col, col2 from sub_qr union all select 'def' as grpBy_col, col2 from sub_qr) x
group by grpBy_col with rollup
POSTHOOK: type: QUERY
POSTHOOK: Input: default@grp_set_test
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Union 2 (CONTAINS)
Map 4 <- Union 2 (CONTAINS)
Reducer 3 <- Union 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: grp_set_test
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 'abc' (type: string), col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col1)
keys: _col0 (type: string), 0L (type: bigint)
grouping sets: 0, 1
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 4
Map Operator Tree:
TableScan
alias: grp_set_test
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 'def' (type: string), col2 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col1)
keys: _col0 (type: string), 0L (type: bigint)
grouping sets: 0, 1
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
null sort order: zz
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 3
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col2
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
expressions: _col0 (type: string), _col2 (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Union 2
Vertex: Union 2

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: insert into grp_set_test values (1, 1, 1, 1, 1, 1), (1, 1, 1, 2, 2, 10), (1, 1, 1, 2, 3, 100)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
Expand Down