Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-24167: TPC-DS query 14 fails while generating plan for the filter #5077

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttle;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.signature.RelTreeSignature.RelTreeSignatureWriter;
import org.apache.hadoop.hive.ql.plan.ColStatistics;

import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -202,16 +203,20 @@ public HiveTableScan copyIncludingTable(RelDataType newRowtype) {
// Also include partition list key to trigger cost evaluation even if an
// expression was already generated.
@Override public RelWriter explainTerms(RelWriter pw) {
return super.explainTerms(pw)
final RelWriter writer = super.explainTerms(pw)
.itemIf("fromVersion", ((RelOptHiveTable) table).getHiveTableMD().getVersionIntervalFrom(),
isNotBlank(((RelOptHiveTable) table).getHiveTableMD().getVersionIntervalFrom()));
if (pw instanceof RelTreeSignatureWriter) {
deniskuzZ marked this conversation as resolved.
Show resolved Hide resolved
return writer.item("tableScanTrait", this.tableScanTrait);
}
return writer
.itemIf("qbid:alias", concatQbIDAlias, this.useQBIdInDigest)
.itemIf("htColumns", this.neededColIndxsFrmReloptHT, pw.getDetailLevel() == SqlExplainLevel.DIGEST_ATTRIBUTES)
.itemIf("insideView", this.isInsideView(), pw.getDetailLevel() == SqlExplainLevel.DIGEST_ATTRIBUTES)
.itemIf("plKey", ((RelOptHiveTable) table).getPartitionListKey(), pw.getDetailLevel() == SqlExplainLevel.DIGEST_ATTRIBUTES)
.itemIf("table:alias", tblAlias, !this.useQBIdInDigest)
.itemIf("tableScanTrait", this.tableScanTrait,
pw.getDetailLevel() == SqlExplainLevel.DIGEST_ATTRIBUTES)
.itemIf("fromVersion", ((RelOptHiveTable) table).getHiveTableMD().getVersionIntervalFrom(),
isNotBlank(((RelOptHiveTable) table).getHiveTableMD().getVersionIntervalFrom()));
pw.getDetailLevel() == SqlExplainLevel.DIGEST_ATTRIBUTES);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ private ASTNode convert() throws CalciteSemanticException {
if (where != null) {
ASTNode cond = where.getCondition().accept(new RexVisitor(schema, false, root.getCluster().getRexBuilder()));
hiveAST.where = ASTBuilder.where(cond);
planMapper.link(cond, where);
planMapper.link(cond, RelTreeSignature.of(where));
planMapper.link(cond, where);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We link RelTreeSignature first so that we can safely unify multiple filters.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool! :D

}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,15 @@ private String relSignature(RelNode rel) {
}
final StringWriter sw = new StringWriter();
final RelWriter planWriter =
new NonRecursiveRelWriterImpl(
new RelTreeSignatureWriter(
new PrintWriter(sw), SqlExplainLevel.EXPPLAN_ATTRIBUTES, false);
rel.explain(planWriter);
return sw.toString();
}

static class NonRecursiveRelWriterImpl extends RelWriterImplCopy {
public static class RelTreeSignatureWriter extends RelWriterImplCopy {

public NonRecursiveRelWriterImpl(PrintWriter pw, SqlExplainLevel detailLevel, boolean withIdPrefix) {
public RelTreeSignatureWriter(PrintWriter pw, SqlExplainLevel detailLevel, boolean withIdPrefix) {
super(pw, detailLevel, withIdPrefix);
}

Expand Down
15 changes: 13 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -1046,11 +1046,16 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Obje
}
}
if (nd instanceof TableScanOperator) {
TableScanOperator ts = (TableScanOperator) nd;
// If the tablescan operator is making use of filtering capabilities of readers then
// we will not see the actual incoming rowcount which was processed - so we may not use it for relNodes
TableScanOperator ts = (TableScanOperator) nd;
if (ts.getConf().getPredicateString() != null) {
planMapper.link(ts, new OperatorStats.MayNotUseForRelNodes());
invalidateForRelNodes(ts, false);
}
// If sampling is configured, the table scan could be canceled in the middle. We avoid using runtime stats
// for HiveTableScan and its descendants as it is not pushed down to HiveTableScan RelNodes
if (ts.getConf().getRowLimit() >= 0) {
invalidateForRelNodes(ts, true);
deniskuzZ marked this conversation as resolved.
Show resolved Hide resolved
}
}
return null;
Expand All @@ -1074,6 +1079,12 @@ private void mark(Operator<?> op) {
planMapper.link(op, new OperatorStats.IncorrectRuntimeStatsMarker());
}

private void invalidateForRelNodes(Operator<?> op, boolean recursive) {
planMapper.link(op, new OperatorStats.MayNotUseForRelNodes());
if (recursive) {
op.getChildOperators().forEach(child -> invalidateForRelNodes(child, true));
}
}
}

private void markOperatorsWithUnstableRuntimeStats(OptimizeTezProcContext procCtx) throws SemanticException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignatureFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import org.apache.hadoop.hive.ql.optimizer.signature.RelTreeSignature;

/**
* Enables to connect related objects to eachother.
Expand All @@ -45,8 +46,11 @@
*/
public class PlanMapper {

Set<EquivGroup> groups = new HashSet<>();
private Map<Object, EquivGroup> objectMap = new CompositeMap<>(OpTreeSignature.class, AuxOpTreeSignature.class);
private final Set<EquivGroup> groups = new HashSet<>();
private final Map<Object, EquivGroup> objectMap = new CompositeMap<>(
OpTreeSignature.class,
AuxOpTreeSignature.class,
RelTreeSignature.class);

/**
* Specialized class which can compare by identity or value; based on the key type.
Expand Down
28 changes: 28 additions & 0 deletions ql/src/test/queries/clientpositive/cbo_cte_materialization.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
--! qt:dataset:src

set hive.optimize.cte.materialize.threshold=1;
set hive.optimize.cte.materialize.full.aggregate.only=false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set hive.query.planmapper.link.relnodes=false;

By setting this property the test fails with the same RuntimeException: equivalence mapping violation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm checking the whole execution and what the flag does. Disabling hive.query.planmapper.link.relnodes, no aux signature is created, and no merge across Operators doesn't happen. I feel it is not consistent with the description, Whether to link Calcite nodes to runtime statistics.
I guess the direct problem will be resolved if we skip linking RelNodes with Operators when hive.query.planmapper.link.relnodes=false is configured. That sounds more consistent with the description.

I'm putting my additional notes here. I tried to put some to-be solutions but it could not be very easy.
https://docs.google.com/document/d/1LCST23cSBZglBzjhnCqHlpcLv6xrXRBxU_wszSDAk9w/edit?usp=sharing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess hive.query.planmapper.link.relnodes can be implemented like this. @kgyrtkirk might have some more contexts.
okumin@cdab2c1

Copy link
Contributor Author

@okumin okumin May 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found this fails if we disable hive.query.planmapper.link.relnodes.

create table src (key string, value string);

set hive.cbo.fallback.strategy=NEVER;
set hive.query.planmapper.link.relnodes=false;
set hive.optimize.ppd=false;

EXPLAIN
SELECT * FROM src WHERE key = '5'
UNION ALL
SELECT * FROM src WHERE key = '5';

@zabetak As a potential workaround, I'm wondering if it makes sense to relax the validation and disable features with which PlanMapper is involved when "equivalence mapping violation" happens.
I presume PlanMapper succeeds in 99.9% cases as most qtests succeed. Currently, the remaining 0.1% fails and it sounds like an excessive penalty. We can keep the validation for qtests so that we can minimize the risk of degradation.
In my feeling, it could be possible to decrease the 0.1% to 0.05%, but could be tough to achieve 0% with a single patch.
okumin@dd5be15


EXPLAIN CBO
WITH materialized_cte AS (
SELECT key, value FROM src WHERE key != '100'
),
another_materialized_cte AS (
SELECT key, value FROM src WHERE key != '100'
)
SELECT a.key, a.value, b.key, b.value
FROM materialized_cte a
JOIN another_materialized_cte b ON a.key = b.key
ORDER BY a.key;

EXPLAIN
WITH materialized_cte AS (
SELECT key, value FROM src WHERE key != '100'
),
another_materialized_cte AS (
SELECT key, value FROM src WHERE key != '100'
)
SELECT a.key, a.value, b.key, b.value
FROM materialized_cte a
JOIN another_materialized_cte b ON a.key = b.key
ORDER BY a.key;
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/cbo_query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain cbo
Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain
Expand Down