Skip to content

Commit

Permalink
HIVE-24373 : Wrong predicate is pushed down for view with constant va…
Browse files Browse the repository at this point in the history
…lue projection. (Mahesh Kumar Behera, reviewed by Jesus Camacho Rodriguez)
  • Loading branch information
maheshk114 committed Nov 13, 2020
1 parent fa18755 commit 243155b
Show file tree
Hide file tree
Showing 3 changed files with 195 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,9 @@ public Map<String, List<ExprNodeDesc>> getResidualPredicates(boolean clear) {
for (Map.Entry<String, List<ExprNodeDesc>> entry : nonFinalPreds.entrySet()) {
List<ExprNodeDesc> converted = new ArrayList<ExprNodeDesc>();
for (ExprNodeDesc newExpr : entry.getValue()) {
converted.add(newToOldExprMap.get(newExpr));
// We should clone it to avoid getting overwritten if two or more operator uses
// this same expression.
converted.add(newToOldExprMap.get(newExpr).clone());
}
oldExprs.put(entry.getKey(), converted);
}
Expand Down
38 changes: 38 additions & 0 deletions ql/src/test/queries/clientpositive/ppd2.q
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,41 @@ from (
distribute by a.key
sort by a.key,a.cc desc) b
where b.cc>1;


set hive.explain.user=false;
set hive.cbo.enable=false;
set hive.optimize.ppd=true;
set hive.ppd.remove.duplicatefilters=true;

DROP TABLE arc;
CREATE table arc(`dt_from` string, `dt_to` string);

DROP TABLE loc1;
CREATE table loc1(`dt_from` string, `dt_to` string);

-- INSERT INTO arc VALUES('2020', '2020');
-- INSERT INTO loc1 VALUES('2020', '2020');

DROP VIEW view;
CREATE
VIEW view AS
SELECT
'9999' as DT_FROM,
uuid() as DT_TO
FROM
loc1
UNION ALL
SELECT
dt_from as DT_FROM,
uuid() as DT_TO
FROM
arc;

EXPLAIN
SELECT
dt_from, dt_to
FROM
view
WHERE
'2020' between dt_from and dt_to;
154 changes: 154 additions & 0 deletions ql/src/test/results/clientpositive/llap/ppd2.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -691,3 +691,157 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
95 2
97 2
98 2
PREHOOK: query: DROP TABLE arc
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE arc
POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE table arc(`dt_from` string, `dt_to` string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@arc
POSTHOOK: query: CREATE table arc(`dt_from` string, `dt_to` string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@arc
PREHOOK: query: DROP TABLE loc1
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE loc1
POSTHOOK: type: DROPTABLE
PREHOOK: query: CREATE table loc1(`dt_from` string, `dt_to` string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@loc1
POSTHOOK: query: CREATE table loc1(`dt_from` string, `dt_to` string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@loc1
PREHOOK: query: DROP VIEW view
PREHOOK: type: DROPVIEW
POSTHOOK: query: DROP VIEW view
POSTHOOK: type: DROPVIEW
PREHOOK: query: CREATE
VIEW view AS
SELECT
'9999' as DT_FROM,
uuid() as DT_TO
FROM
loc1
UNION ALL
SELECT
dt_from as DT_FROM,
uuid() as DT_TO
FROM
arc
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@arc
PREHOOK: Input: default@loc1
PREHOOK: Output: database:default
PREHOOK: Output: default@view
POSTHOOK: query: CREATE
VIEW view AS
SELECT
'9999' as DT_FROM,
uuid() as DT_TO
FROM
loc1
UNION ALL
SELECT
dt_from as DT_FROM,
uuid() as DT_TO
FROM
arc
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@arc
POSTHOOK: Input: default@loc1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@view
POSTHOOK: Lineage: view.dt_from EXPRESSION [(arc)arc.FieldSchema(name:dt_from, type:string, comment:null), ]
POSTHOOK: Lineage: view.dt_to EXPRESSION []
PREHOOK: query: EXPLAIN
SELECT
dt_from, dt_to
FROM
view
WHERE
'2020' between dt_from and dt_to
PREHOOK: type: QUERY
PREHOOK: Input: default@arc
PREHOOK: Input: default@loc1
PREHOOK: Input: default@view
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN
SELECT
dt_from, dt_to
FROM
view
WHERE
'2020' between dt_from and dt_to
POSTHOOK: type: QUERY
POSTHOOK: Input: default@arc
POSTHOOK: Input: default@loc1
POSTHOOK: Input: default@view
#### A masked pattern was here ####
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Edges:
Map 1 <- Union 2 (CONTAINS)
Map 3 <- Union 2 (CONTAINS)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: loc1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
expressions: '9999' (type: string), uuid() (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
Filter Operator
predicate: '2020' BETWEEN '9999' AND _col1 (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 184 Basic stats: PARTIAL Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Map 3
Map Operator Tree:
TableScan
alias: arc
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: dt_from (type: string), uuid() (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: '2020' BETWEEN _col0 AND _col1 (type: boolean)
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 184 Basic stats: PARTIAL Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
Union 2
Vertex: Union 2

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

0 comments on commit 243155b

Please sign in to comment.