Skip to content

Commit

Permalink
improve
Browse files Browse the repository at this point in the history
  • Loading branch information
dengzhhu653 committed Jun 11, 2021
1 parent cb0541a commit c677418
Show file tree
Hide file tree
Showing 3 changed files with 327 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9221,7 +9221,7 @@ private ExprNodeDesc[][] genJoinKeys(QBJoinTree joinTree, Operator[] inputs)

@SuppressWarnings("nls")
private Operator genJoinReduceSinkChild(ExprNodeDesc[] joinKeys,
Operator<?> parent, String[] srcs, int tag) throws SemanticException {
Operator<?> parent, int tag) throws SemanticException {

Operator dummy = Operator.createDummy(); // dummy for backtracking
dummy.setParentOperators(Arrays.asList(parent));
Expand Down Expand Up @@ -9323,8 +9323,6 @@ private Operator genJoinReduceSinkChild(ExprNodeDesc[] joinKeys,
translatorMap.put(oldName, newName);
}

RowSchema defaultRs = new RowSchema(outputRR.getColumnInfos());

List<ColumnInfo> newColumnInfos = new ArrayList<ColumnInfo>();
for (ColumnInfo ci : outputRR.getColumnInfos()) {
if (translatorMap.containsKey(ci.getInternalName())) {
Expand All @@ -9339,7 +9337,9 @@ private Operator genJoinReduceSinkChild(ExprNodeDesc[] joinKeys,

rsOp.setValueIndex(index);
rsOp.setColumnExprMap(colExprMap);
rsOp.setInputAliases(srcs);
List<String> inputAliaes= new ArrayList<>(parent.getSchema().getTableNames());
Collections.sort(inputAliaes);
rsOp.setInputAliases(inputAliaes.toArray(new String[0]));
return rsOp;
}

Expand Down Expand Up @@ -9399,11 +9399,10 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree,

for (int i = 0; i < srcOps.length; i++) {
// generate a ReduceSink operator for the join
String[] srcs = baseSrc[i] != null ? new String[] {baseSrc[i]} : joinTree.getLeftAliases();
if (!isCBOExecuted()) {
srcOps[i] = genNotNullFilterForJoinSourcePlan(qb, srcOps[i], joinTree, joinKeys[i]);
}
srcOps[i] = genJoinReduceSinkChild(joinKeys[i], srcOps[i], srcs, joinTree.getNextTag());
srcOps[i] = genJoinReduceSinkChild(joinKeys[i], srcOps[i], joinTree.getNextTag());
}

Operator<?> topOp = genJoinOperatorChildren(joinTree, joinSrcOp, srcOps, omitOpts, joinKeys);
Expand Down
43 changes: 43 additions & 0 deletions ql/src/test/queries/clientpositive/lateral_left_semi_join.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
create table service_stat_log(
logitems array<struct<dsp:string,iswin:boolean,adid:int,triggerId:string>>
);
create table ad_info(adid int, subaccountid int);

insert into table service_stat_log values
(array(named_struct('dsp', 'delivery', 'iswin', true, 'adid', 1, 'triggerId', 'a'))),
(array(named_struct('dsp', 'ocpa', 'iswin', true, 'adid', 2, 'triggerId', 'b'))),
(array(named_struct('dsp', 'ocpa', 'iswin', false, 'adid', 3, 'triggerId', 'c'))),
(array(named_struct('dsp', 'other', 'iswin', true, 'adid', 4, 'triggerId', 'd'))),
(array(named_struct('dsp', 'other', 'iswin', false, 'adid', 5, 'triggerId', 'e')));

insert into table ad_info values
(1, 16010),
(2, 14863),
(3, 16010),
(4, 14863),
(5, 16010);

explain select count(distinct logItem.triggerId)
from service_stat_log LATERAL VIEW explode(logItems) LogItemTable AS logItem
where logItem.dsp in ('delivery', 'ocpa')
and logItem.iswin = true
and logItem.adid in (
select distinct adId
from
ad_info
where
subAccountId in (16010, 14863));

select count(distinct logItem.triggerId)
from service_stat_log LATERAL VIEW explode(logItems) LogItemTable AS logItem
where logItem.dsp in ('delivery', 'ocpa')
and logItem.iswin = true
and logItem.adid in (
select distinct adId
from
ad_info
where
subAccountId in (16010, 14863));

drop table service_stat_log;
drop table ad_info;
279 changes: 279 additions & 0 deletions ql/src/test/results/clientpositive/llap/lateral_left_semi_join.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
PREHOOK: query: create table service_stat_log(
logitems array<struct<dsp:string,iswin:boolean,adid:int,triggerId:string>>
)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@service_stat_log
POSTHOOK: query: create table service_stat_log(
logitems array<struct<dsp:string,iswin:boolean,adid:int,triggerId:string>>
)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@service_stat_log
PREHOOK: query: create table ad_info(adid int, subaccountid int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ad_info
POSTHOOK: query: create table ad_info(adid int, subaccountid int)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ad_info
PREHOOK: query: insert into table service_stat_log values
(array(named_struct('dsp', 'delivery', 'iswin', true, 'adid', 1, 'triggerId', 'a'))),
(array(named_struct('dsp', 'ocpa', 'iswin', true, 'adid', 2, 'triggerId', 'b'))),
(array(named_struct('dsp', 'ocpa', 'iswin', false, 'adid', 3, 'triggerId', 'c'))),
(array(named_struct('dsp', 'other', 'iswin', true, 'adid', 4, 'triggerId', 'd'))),
(array(named_struct('dsp', 'other', 'iswin', false, 'adid', 5, 'triggerId', 'e')))
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@service_stat_log
POSTHOOK: query: insert into table service_stat_log values
(array(named_struct('dsp', 'delivery', 'iswin', true, 'adid', 1, 'triggerId', 'a'))),
(array(named_struct('dsp', 'ocpa', 'iswin', true, 'adid', 2, 'triggerId', 'b'))),
(array(named_struct('dsp', 'ocpa', 'iswin', false, 'adid', 3, 'triggerId', 'c'))),
(array(named_struct('dsp', 'other', 'iswin', true, 'adid', 4, 'triggerId', 'd'))),
(array(named_struct('dsp', 'other', 'iswin', false, 'adid', 5, 'triggerId', 'e')))
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@service_stat_log
POSTHOOK: Lineage: service_stat_log.logitems SCRIPT []
PREHOOK: query: insert into table ad_info values
(1, 16010),
(2, 14863),
(3, 16010),
(4, 14863),
(5, 16010)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ad_info
POSTHOOK: query: insert into table ad_info values
(1, 16010),
(2, 14863),
(3, 16010),
(4, 14863),
(5, 16010)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ad_info
POSTHOOK: Lineage: ad_info.adid SCRIPT []
POSTHOOK: Lineage: ad_info.subaccountid SCRIPT []
PREHOOK: query: explain select count(distinct logItem.triggerId)
from service_stat_log LATERAL VIEW explode(logItems) LogItemTable AS logItem
where logItem.dsp in ('delivery', 'ocpa')
and logItem.iswin = true
and logItem.adid in (
select distinct adId
from
ad_info
where
subAccountId in (16010, 14863))
PREHOOK: type: QUERY
PREHOOK: Input: default@ad_info
PREHOOK: Input: default@service_stat_log
#### A masked pattern was here ####
POSTHOOK: query: explain select count(distinct logItem.triggerId)
from service_stat_log LATERAL VIEW explode(logItems) LogItemTable AS logItem
where logItem.dsp in ('delivery', 'ocpa')
and logItem.iswin = true
and logItem.adid in (
select distinct adId
from
ad_info
where
subAccountId in (16010, 14863))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ad_info
POSTHOOK: Input: default@service_stat_log
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
Reducer 5 <- Map 4 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: service_stat_log
Statistics: Num rows: 5 Data size: 22400 Basic stats: COMPLETE Column stats: NONE
Lateral View Forward
Statistics: Num rows: 5 Data size: 22400 Basic stats: COMPLETE Column stats: NONE
Select Operator
Statistics: Num rows: 5 Data size: 22400 Basic stats: COMPLETE Column stats: NONE
Lateral View Join Operator
outputColumnNames: _col5
Statistics: Num rows: 1 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col5.adid (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col5.adid (type: int)
Statistics: Num rows: 1 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: struct<dsp:string,iswin:boolean,adid:int,triggerid:string>)
Select Operator
expressions: logitems (type: array<struct<dsp:string,iswin:boolean,adid:int,triggerid:string>>)
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 22400 Basic stats: COMPLETE Column stats: NONE
UDTF Operator
Statistics: Num rows: 5 Data size: 22400 Basic stats: COMPLETE Column stats: NONE
function name: explode
Filter Operator
predicate: (col.adid is not null and (col.dsp) IN ('delivery', 'ocpa') and (col.iswin = true)) (type: boolean)
Statistics: Num rows: 1 Data size: 4480 Basic stats: COMPLETE Column stats: NONE
Lateral View Join Operator
outputColumnNames: _col5
Statistics: Num rows: 1 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col5.adid (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col5.adid (type: int)
Statistics: Num rows: 1 Data size: 8960 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: struct<dsp:string,iswin:boolean,adid:int,triggerid:string>)
Execution mode: llap
LLAP IO: all inputs
Map 4
Map Operator Tree:
TableScan
alias: ad_info
filterExpr: ((subaccountid) IN (16010, 14863) and adid is not null) (type: boolean)
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((subaccountid) IN (16010, 14863) and adid is not null) (type: boolean)
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: adid (type: int)
outputColumnNames: adid
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: adid (type: int)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Merge Join Operator
condition map:
Left Semi Join 0 to 1
keys:
0 _col5.adid (type: int)
1 _col0 (type: int)
outputColumnNames: _col5
Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT _col5.triggerid)
keys: _col5.triggerid (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: z
sort order: +
Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col0:0._col0)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: _col0 (type: int)
minReductionHashAggr: 0.4
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: select count(distinct logItem.triggerId)
from service_stat_log LATERAL VIEW explode(logItems) LogItemTable AS logItem
where logItem.dsp in ('delivery', 'ocpa')
and logItem.iswin = true
and logItem.adid in (
select distinct adId
from
ad_info
where
subAccountId in (16010, 14863))
PREHOOK: type: QUERY
PREHOOK: Input: default@ad_info
PREHOOK: Input: default@service_stat_log
#### A masked pattern was here ####
POSTHOOK: query: select count(distinct logItem.triggerId)
from service_stat_log LATERAL VIEW explode(logItems) LogItemTable AS logItem
where logItem.dsp in ('delivery', 'ocpa')
and logItem.iswin = true
and logItem.adid in (
select distinct adId
from
ad_info
where
subAccountId in (16010, 14863))
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ad_info
POSTHOOK: Input: default@service_stat_log
#### A masked pattern was here ####
2
PREHOOK: query: drop table service_stat_log
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@service_stat_log
PREHOOK: Output: default@service_stat_log
POSTHOOK: query: drop table service_stat_log
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@service_stat_log
POSTHOOK: Output: default@service_stat_log
PREHOOK: query: drop table ad_info
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@ad_info
PREHOOK: Output: default@ad_info
POSTHOOK: query: drop table ad_info
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@ad_info
POSTHOOK: Output: default@ad_info

0 comments on commit c677418

Please sign in to comment.