Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-26160: Materialized View rewrite does not check tables scanned in sub-query expressions #3246

Merged
merged 1 commit into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 10 additions & 5 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -1680,8 +1680,6 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
LOG.debug("Initial CBO Plan:\n" + RelOptUtil.toString(calcitePlan));
}

calcitePlan = applyMaterializedViewRewritingByText(ast, calcitePlan, optCluster);

// Create executor
RexExecutor executorProvider = new HiveRexExecutorImpl();
calcitePlan.getCluster().getPlanner().setExecutor(executorProvider);
Expand All @@ -1691,6 +1689,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
optCluster.invalidateMetadataQuery();

calcitePlan = applyMaterializedViewRewritingByText(
ast, calcitePlan, optCluster, mdProvider.getMetadataProvider());

// We need to get the ColumnAccessInfo and viewToTableSchema for views.
HiveRelFieldTrimmer.get()
.trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
Expand Down Expand Up @@ -2107,7 +2108,10 @@ private boolean isMaterializedViewRewritingByTextEnabled() {
}

private RelNode applyMaterializedViewRewritingByText(
ASTNode queryToRewriteAST, RelNode originalPlan, RelOptCluster optCluster) {
ASTNode queryToRewriteAST,
RelNode originalPlan,
RelOptCluster optCluster,
RelMetadataProvider metadataProvider) {
zabetak marked this conversation as resolved.
Show resolved Hide resolved
if (!isMaterializedViewRewritingByTextEnabled()) {
return originalPlan;
}
Expand All @@ -2121,8 +2125,9 @@ private RelNode applyMaterializedViewRewritingByText(
queryToRewriteAST.getTokenStopIndex());

ASTNode expandedAST = ParseUtils.parse(expandedQueryText, new Context(conf));
Set<TableName> tablesUsedByOriginalPlan = getTablesUsed(originalPlan);
RelNode mvScan = getMaterializedViewByAST(expandedAST, optCluster, ANY, db, tablesUsedByOriginalPlan, getTxnMgr());
Set<TableName> tablesUsedByOriginalPlan = getTablesUsed(removeSubqueries(originalPlan, metadataProvider));
RelNode mvScan = getMaterializedViewByAST(
expandedAST, optCluster, ANY, db, tablesUsedByOriginalPlan, getTxnMgr());
if (mvScan != null) {
return mvScan;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
set hive.support.concurrency=true;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.materializedview.rewriting=false;

create table t1(col0 int) STORED AS ORC
TBLPROPERTIES ('transactional'='true');

create table t2(col0 int) STORED AS ORC
TBLPROPERTIES ('transactional'='true');

create materialized view mat1 as
select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2;

-- View can be used -> rewrite
explain cbo
select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2);

insert into t1(col0) values (2);

-- View can not be used since it is outdated
explain cbo
select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2);
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
PREHOOK: query: create table t1(col0 int) STORED AS ORC
TBLPROPERTIES ('transactional'='true')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1
POSTHOOK: query: create table t1(col0 int) STORED AS ORC
TBLPROPERTIES ('transactional'='true')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
PREHOOK: query: create table t2(col0 int) STORED AS ORC
TBLPROPERTIES ('transactional'='true')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t2
POSTHOOK: query: create table t2(col0 int) STORED AS ORC
TBLPROPERTIES ('transactional'='true')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t2
Only query text based automatic rewriting is available for materialized view. Statement has unsupported operator: union.
PREHOOK: query: create materialized view mat1 as
select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2
PREHOOK: type: CREATE_MATERIALIZED_VIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@mat1
POSTHOOK: query: create materialized view mat1 as
select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2
POSTHOOK: type: CREATE_MATERIALIZED_VIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@mat1
POSTHOOK: Lineage: mat1.col0 EXPRESSION []
PREHOOK: query: explain cbo
select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2)
PREHOOK: type: QUERY
PREHOOK: Input: default@mat1
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@mat1
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
CBO PLAN:
HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
HiveProject(col0=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, t2]], table:alias=[t2])
HiveProject(col0=[$0])
HiveFilter(condition=[IS NOT NULL($0)])
HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])

PREHOOK: query: insert into t1(col0) values (2)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@t1
POSTHOOK: query: insert into t1(col0) values (2)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@t1
POSTHOOK: Lineage: t1.col0 SCRIPT []
PREHOOK: query: explain cbo
select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2)
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
CBO PLAN:
HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
HiveProject(col0=[$0])
HiveFilter(condition=[IN($0, 1, 2)])
HiveTableScan(table=[[default, t2]], table:alias=[t2])
HiveProject($f0=[$0])
HiveUnion(all=[true])
HiveProject($f0=[CAST(1):INTEGER])
HiveFilter(condition=[=($0, 1)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveProject($f0=[CAST(2):INTEGER])
HiveFilter(condition=[=($0, 2)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])