Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-26722: HiveFilterSetOpTransposeRule incorrectly prunes UNION ALL… #3748

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -42,8 +42,6 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;

import com.google.common.collect.ImmutableList;

public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule {

public static final HiveFilterSetOpTransposeRule INSTANCE =
Expand All @@ -66,8 +64,8 @@ public class HiveFilterSetOpTransposeRule extends FilterSetOpTransposeRule {
* Op1 Op2
*
*
* It additionally can remove branch(es) of filter if its able to determine
* that they are going to generate empty result set.
* It additionally can remove branch(es) of filter if it's able to determine
* that they are going to generate an empty result set.
*/
private HiveFilterSetOpTransposeRule(RelBuilderFactory relBuilderFactory) {
super(relBuilderFactory);
Expand Down Expand Up @@ -111,18 +109,14 @@ public void onMatch(RelOptRuleCall call) {
final RelMetadataQuery mq = call.getMetadataQuery();
final RelOptPredicateList predicates = mq.getPulledUpPredicates(input);
if (predicates != null) {
ImmutableList.Builder<RexNode> listBuilder = ImmutableList.builder();
listBuilder.addAll(predicates.pulledUpPredicates);
listBuilder.add(newCondition);
RexExecutor executor =
final RexExecutor executor =
Util.first(filterRel.getCluster().getPlanner().getExecutor(), RexUtil.EXECUTOR);
final RexSimplify simplify = new RexSimplify(rexBuilder, RelOptPredicateList.EMPTY, executor);
final RexNode cond = RexUtil.composeConjunction(rexBuilder, listBuilder.build());
final RexNode x = simplify.simplifyUnknownAs(cond, RexUnknownAs.FALSE);
final RexSimplify simplify = new RexSimplify(rexBuilder, predicates, executor);
final RexNode x = simplify.simplifyUnknownAs(newCondition, RexUnknownAs.FALSE);
if (x.isAlwaysFalse()) {
// this is the last branch, and it is always false
// We assume alwaysFalse filter will get pushed down to TS so this
// branch so it won't read any data.
// branch won't read any data.
if (index == setOp.getInputs().size() - 1) {
lastInput = relBuilder.push(input).filter(newCondition).build();
}
Expand Down
32 changes: 32 additions & 0 deletions ql/src/test/queries/clientpositive/union_all_filter_transpose.q
@@ -0,0 +1,32 @@
# needed to avoid the simplification of CAST(NULL) into NULL
set hive.cbo.rule.exclusion.regex=ReduceExpressionsRule\(Project\);

CREATE EXTERNAL TABLE t (a string, b string);

INSERT INTO t VALUES ('1000', 'b1');
INSERT INTO t VALUES ('2000', 'b2');

SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000;

EXPLAIN CBO
SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000;
Expand Up @@ -46,11 +46,10 @@ POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@v1
CBO PLAN:
HiveProject(col0=[CAST(10):INTEGER])
HiveAggregate(group=[{0}])
HiveProject($f0=[true])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveAggregate(group=[{0}])
HiveProject($f0=[CAST(10):INTEGER])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: create view v1 as
select sub.* from (select * from t1 where col0 > 2 union select * from t1 where col0 = 0) sub
Expand Down Expand Up @@ -80,11 +79,10 @@ POSTHOOK: Input: default@t1
POSTHOOK: Input: default@v1
#### A masked pattern was here ####
CBO PLAN:
HiveProject(col0=[CAST(10):INTEGER])
HiveAggregate(group=[{0}])
HiveProject($f0=[true])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveAggregate(group=[{0}])
HiveProject($f0=[CAST(10):INTEGER])
HiveFilter(condition=[=($0, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from v1
PREHOOK: type: QUERY
Expand Down
@@ -0,0 +1,99 @@
PREHOOK: query: CREATE EXTERNAL TABLE t (a string, b string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t
POSTHOOK: query: CREATE EXTERNAL TABLE t (a string, b string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t
PREHOOK: query: INSERT INTO t VALUES ('1000', 'b1')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t
POSTHOOK: query: INSERT INTO t VALUES ('1000', 'b1')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t
POSTHOOK: Lineage: t.a SCRIPT []
POSTHOOK: Lineage: t.b SCRIPT []
PREHOOK: query: INSERT INTO t VALUES ('2000', 'b2')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t
POSTHOOK: query: INSERT INTO t VALUES ('2000', 'b2')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t
POSTHOOK: Lineage: t.a SCRIPT []
POSTHOOK: Lineage: t.b SCRIPT []
PREHOOK: query: SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
PREHOOK: type: QUERY
PREHOOK: Input: default@t
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
#### A masked pattern was here ####
1000 b1
1000 NULL
PREHOOK: query: EXPLAIN CBO
SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
PREHOOK: type: QUERY
PREHOOK: Input: default@t
#### A masked pattern was here ####
POSTHOOK: query: EXPLAIN CBO
SELECT * FROM (
SELECT
a,
b
FROM t
UNION ALL
SELECT
a,
CAST(NULL AS string)
FROM t) AS t2
WHERE a = 1000
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
#### A masked pattern was here ####
Excluded rules: ReduceExpressionsRule\(Project\)

CBO PLAN:
HiveUnion(all=[true])
HiveProject(a=[$0], b=[$1])
HiveFilter(condition=[=(CAST($0):DOUBLE, 1000)])
HiveTableScan(table=[[default, t]], table:alias=[t])
HiveProject(a=[$0], _o__c1=[null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE"])
HiveFilter(condition=[=(CAST($0):DOUBLE, 1000)])
HiveTableScan(table=[[default, t]], table:alias=[t])