Skip to content

Commit

Permalink
HIVE-24601: Control CBO fallback behavior via property (#1875) (Stama…
Browse files Browse the repository at this point in the history
…tis Zampetakis reviewed by Zoltan Haindrich)
  • Loading branch information
zabetak committed Jan 28, 2021
1 parent 1781615 commit 170d5b4
Show file tree
Hide file tree
Showing 47 changed files with 321 additions and 37 deletions.
7 changes: 7 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -1807,6 +1807,13 @@ public static enum ConfVars {

// CBO related
HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."),
HIVE_CBO_FALLBACK_STRATEGY("hive.cbo.fallback.strategy", "CONSERVATIVE",
new StringSet(true, "NEVER", "CONSERVATIVE", "ALWAYS", "TEST"),
"The strategy defines when Hive fallbacks to legacy optimizer when CBO fails:"
+ "NEVER, never use the legacy optimizer (all CBO errors are fatal);"
+ "ALWAYS, always use the legacy optimizer (CBO errors are not fatal);"
+ "CONSERVATIVE, use the legacy optimizer only when the CBO error is not related to subqueries and views;"
+ "TEST, specific behavior only for tests, do not use in production"),
HIVE_CBO_CNF_NODES_LIMIT("hive.cbo.cnf.maxnodes", -1, "When converting to conjunctive normal form (CNF), fail if" +
"the expression exceeds this threshold; the threshold is expressed in terms of number of nodes (leaves and" +
"interior nodes). -1 to not set up a threshold."),
Expand Down
5 changes: 5 additions & 0 deletions data/conf/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -387,4 +387,9 @@
<value>false</value>
</property>

<property>
<name>hive.cbo.fallback.strategy</name>
<value>TEST</value>
</property>

</configuration>
5 changes: 5 additions & 0 deletions data/conf/llap/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,9 @@
<value>false</value>
</property>

<property>
<name>hive.cbo.fallback.strategy</name>
<value>TEST</value>
</property>

</configuration>
5 changes: 5 additions & 0 deletions data/conf/tez/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -308,4 +308,9 @@
<value>query</value>
</property>

<property>
<name>hive.cbo.fallback.strategy</name>
<value>TEST</value>
</property>

</configuration>
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;

import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException;

/**
* A strategy defining when CBO fallbacks to the legacy optimizer.
*/
enum CBOFallbackStrategy {
/**
* Never use the legacy optimizer, all CBO errors are fatal.
*/
NEVER {
@Override
boolean isFatal(Exception e) {
return true;
}
},
/**
* Use the legacy optimizer only when the CBO exception is not related to subqueries and views.
*/
CONSERVATIVE {
@Override
boolean isFatal(Exception e) {
// Non-CBO path for the following exceptions fail with completely different error and mask the original failure
return e instanceof CalciteSubquerySemanticException || e instanceof CalciteViewSemanticException
|| e instanceof CalciteSubqueryRuntimeException;
}
},
/**
* Always use the legacy optimizer, CBO errors are not fatal.
*/
ALWAYS {
@Override
boolean isFatal(Exception e) {
return false;
}
},
/**
* Specific strategy only for tests.
*/
TEST {
@Override
boolean isFatal(Exception e) {
if (e instanceof CalciteSubquerySemanticException || e instanceof CalciteViewSemanticException
|| e instanceof CalciteSubqueryRuntimeException) {
return true;
}
return !(e instanceof CalciteSemanticException);
}
};

/**
* Returns true if the specified exception is fatal (must not fallback to legacy optimizer), and false otherwise.
*/
abstract boolean isFatal(Exception e);
}
21 changes: 5 additions & 16 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
Expand Down Expand Up @@ -364,6 +363,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
private SemanticException semanticException;
private boolean runCBO = true;
private boolean disableSemJoinReordering = true;
private final CBOFallbackStrategy fallbackStrategy;

private EnumSet<ExtendedCBOProfile> profilesCBO;

Expand Down Expand Up @@ -439,6 +439,7 @@ public CalcitePlanner(QueryState queryState) throws SemanticException {
runCBO = false;
disableSemJoinReordering = false;
}
fallbackStrategy = CBOFallbackStrategy.valueOf(conf.getVar(ConfVars.HIVE_CBO_FALLBACK_STRATEGY));
}

public void resetCalciteConfiguration() {
Expand Down Expand Up @@ -679,27 +680,15 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept

// Determine if we should re-throw the exception OR if we try to mark plan as reAnayzeAST to retry
// planning as non-CBO.
if (e instanceof CalciteSubquerySemanticException || e instanceof CalciteViewSemanticException
|| e instanceof CalciteSubqueryRuntimeException) {
// Non-CBO path for CalciteSubquerySemanticException fails with completely different error
// and masks the original failure.
// Non-CBO path for CalciteViewSemanticException would fail in a similar way as CBO path.
throw new SemanticException(e);
}

boolean isHiveTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST);
// At this point we retry with CBO off:
// 1) If this is not test mode (common case)
// 2) If we are in test mode and we are missing stats
// 3) if we are in test mode and a CalciteSemanticException is generated
reAnalyzeAST = (!isHiveTest || isMissingStats || e instanceof CalciteSemanticException);
if (!reAnalyzeAST) {
if (fallbackStrategy.isFatal(e)) {
if (e instanceof RuntimeException || e instanceof SemanticException) {
// These types of exceptions do not need wrapped
throw e;
}
// Wrap all other errors (Should only hit in tests)
throw new SemanticException(e);
} else {
reAnalyzeAST = true;
}
} finally {
runCBO = false;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--! qt:dataset:part
-- =ALL is not allowed and initially triggers a CalciteSubquerySemanticException
set hive.cbo.fallback.strategy=CONSERVATIVE;
-- In CONSERVATIVE mode CalciteSubquerySemanticException is fatal
-- and should be present in the error message
explain select * from part where p_type = ALL(select max(p_type) from part);
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
--! qt:dataset:src
set hive.cbo.fallback.strategy=NEVER;
-- The query generates initially a CalciteSemanticException on CBO but can be handled by the legacy optimizer
-- In NEVER mode we never fallback so the error should contain the CalciteSemanticException
select count(*) from src where key <=> 100;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
--! qt:dataset:part
-- =ALL is not allowed and initially triggers a CalciteSubquerySemanticException
set hive.cbo.fallback.strategy=NEVER;
-- In NEVER mode we don't retry on CBO failure so CalciteSubquerySemanticException should appear in the error
explain select * from part where p_type = ALL(select max(p_type) from part);
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- Invalid value due to case sensitivity
set hive.cbo.fallback.strategy=never;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- Invalid value not present in StringSet
set hive.cbo.fallback.strategy=NONE;
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--! qt:dataset:part
-- =ALL is not allowed and initially triggers a CalciteSubquerySemanticException
set hive.cbo.fallback.strategy=TEST;
-- In TEST mode CalciteSubquerySemanticException is fatal
-- and should be present in the error message
explain select * from part where p_type = ALL(select max(p_type) from part);
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--! qt:dataset:src
set hive.explain.user=true;
set hive.cbo.fallback.strategy=ALWAYS;
-- The query generates initially a CalciteSemanticException on CBO but can be handled by the legacy optimizer
-- The fact that CBO fails should be reflected in the plan
explain select count(*) from src where key <=> 100;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
--! qt:dataset:part
-- =ALL is not allowed and initially triggers a CalciteSubquerySemanticException
set hive.explain.user=true;
set hive.cbo.fallback.strategy=ALWAYS;
-- The query generates initially a CalciteSubquerySemanticException on CBO but can be handled by the legacy optimizer
-- It is not guaranteed that the resulting plan is correct whatsoever.
-- The fact that CBO fails should be reflected in the plan
explain select * from part where p_type = ALL(select max(p_type) from part);
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--! qt:dataset:src
set hive.explain.user=true;
set hive.cbo.fallback.strategy=CONSERVATIVE;
-- The query generates initially a CalciteSemanticException on CBO but can be handled by the legacy optimizer
-- The fact that CBO fails should be reflected in the plan
explain select count(*) from src where key <=> 100;
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--! qt:dataset:src
set hive.explain.user=true;
set hive.cbo.fallback.strategy=TEST;
-- The query generates initially a CalciteSemanticException on CBO but can be handled by the legacy optimizer
-- The fact that CBO fails should be reflected in the plan
explain select count(*) from src where key <=> 100;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FAILED: CalciteSubquerySemanticException [Error 10250]: Invalid SubQuery expression Invalid operator:=
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FAILED: CalciteSemanticException <=> is not yet supported for cbo.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FAILED: CalciteSubquerySemanticException [Error 10250]: Invalid SubQuery expression Invalid operator:=
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Query returned non-zero code: 1, cause: 'SET hive.cbo.fallback.strategy=never' FAILED in validation : Invalid value.. expects one of [NEVER, CONSERVATIVE, ALWAYS, TEST].
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Query returned non-zero code: 1, cause: 'SET hive.cbo.fallback.strategy=NONE' FAILED in validation : Invalid value.. expects one of [NEVER, CONSERVATIVE, ALWAYS, TEST].
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FAILED: CalciteSubquerySemanticException [Error 10250]: Invalid SubQuery expression Invalid operator:=
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ POSTHOOK: query: create table target1(x int, y int, z int)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:x314n
POSTHOOK: Output: x314n@target1
FAILED: SemanticException 0:0 Expected 3 columns for insclause-0/x314n@target1; select produces 2 columns. Error encountered near token 'TOK_ALLCOLREF'
FAILED: SemanticException 0:0 Expected 3 columns for insclause-0/x314n@target1; select produces 2 columns. Error encountered near token 'source.s2'
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Invalid SubQuery expression Invalid operator:=
FAILED: CalciteSubquerySemanticException [Error 10250]: Invalid SubQuery expression Invalid operator:=
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@tt
POSTHOOK: Lineage: tt.i SCRIPT []
POSTHOOK: Lineage: tt.j SCRIPT []
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException: Subquery rewrite: Aggregate without group by is not allowed
FAILED: CalciteSubqueryRuntimeException Subquery rewrite: Aggregate without group by is not allowed
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Invalid SubQuery expression Invalid operator:<>
FAILED: CalciteSubquerySemanticException [Error 10250]: Invalid SubQuery expression Invalid operator:<>
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 4:113 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)
FAILED: CalciteSubquerySemanticException [Error 10004]: Line 4:113 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 5:95 Invalid table alias or column reference 'x': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)
FAILED: CalciteSubquerySemanticException [Error 10004]: Line 5:95 Invalid table alias or column reference 'x': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 3:54 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)
FAILED: CalciteSubquerySemanticException [Error 10004]: Line 3:54 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 4:89 Invalid table alias or column reference 't1': (possible column names are: t3.p_partkey, t3.p_name, t3.p_mfgr, t3.p_brand, t3.p_type, t3.p_size, t3.p_container, t3.p_retailprice, t3.p_comment, t4.p_partkey, t4.p_name, t4.p_mfgr, t4.p_brand, t4.p_type, t4.p_size, t4.p_container, t4.p_retailprice, t4.p_comment)
FAILED: CalciteSubquerySemanticException [Error 10004]: Line 4:89 Invalid table alias or column reference 't1': (possible column names are: t3.p_partkey, t3.p_name, t3.p_mfgr, t3.p_brand, t3.p_type, t3.p_size, t3.p_container, t3.p_retailprice, t3.p_comment, t4.p_partkey, t4.p_name, t4.p_mfgr, t4.p_brand, t4.p_type, t4.p_size, t4.p_container, t4.p_retailprice, t4.p_comment)
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10250]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 8:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten.
FAILED: CalciteSubquerySemanticException [Error 10250]: Line 8:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates
FAILED: CalciteSubquerySemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates
2 changes: 1 addition & 1 deletion ql/src/test/results/clientnegative/subquery_in_lhs.q.out
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 0:-1 Unsupported SubQuery Expression 'p_size': SubQuery on left hand side is not supported.
FAILED: CalciteSubquerySemanticException Line 0:-1 Unsupported SubQuery Expression 'p_size': SubQuery on left hand side is not supported.
2 changes: 1 addition & 1 deletion ql/src/test/results/clientnegative/subquery_in_on.q.out
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: 3:77 Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates. Error encountered near token 'p2'
FAILED: CalciteSubquerySemanticException 3:77 Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates. Error encountered near token 'p2'
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Invalid SubQuery expression SubQuery can contain only 1 item in Select List.
FAILED: CalciteSubquerySemanticException [Error 10250]: Invalid SubQuery expression SubQuery can contain only 1 item in Select List.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10002]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 4:104 Invalid column reference 'non_exisiting_column'
FAILED: CalciteSubquerySemanticException [Error 10002]: Line 4:104 Invalid column reference 'non_exisiting_column'
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10250]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 8:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten.
FAILED: CalciteSubquerySemanticException [Error 10250]: Line 8:7 Invalid SubQuery expression ''val_9'': A predicate on EXISTS/NOT EXISTS SubQuery with implicit Aggregation(no Group By clause) cannot be rewritten.
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 2:67 Invalid SubQuery expression 'p_type' in definition of SubQuery sq_1 [
FAILED: CalciteSubquerySemanticException Line 2:67 Invalid SubQuery expression 'p_type' in definition of SubQuery sq_1 [
(select p_size, p_type from part)
] used as sq_1 at Line 0:-1: SubQuery can contain only 1 item in Select List.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery in UDAF is not allowed.
FAILED: CalciteSubquerySemanticException [Error 10249]: Unsupported SubQuery Expression Invalid subquery. Subquery in UDAF is not allowed.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported!
FAILED: CalciteSubquerySemanticException [Error 10249]: Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported!
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported!
FAILED: CalciteSubquerySemanticException [Error 10249]: Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported!
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10249]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 5:7 Unsupported SubQuery Expression 'key': SubQuery on left hand side is not supported.
FAILED: CalciteSubquerySemanticException [Error 10249]: Line 5:7 Unsupported SubQuery Expression 'key': SubQuery on left hand side is not supported.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
FAILED: SemanticException [Error 10249]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 7:8 Unsupported SubQuery Expression 'p_brand': Correlated Sub Queries cannot contain Windowing clauses.
FAILED: CalciteSubquerySemanticException [Error 10249]: Line 7:8 Unsupported SubQuery Expression 'p_brand': Correlated Sub Queries cannot contain Windowing clauses.
Loading

0 comments on commit 170d5b4

Please sign in to comment.