New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CALCITE-4302] Improve cost propagation in volcano to avoid re-propagation #2187
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,15 +45,13 @@ | |
|
||
import java.io.PrintWriter; | ||
import java.io.StringWriter; | ||
import java.util.ArrayDeque; | ||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.Comparator; | ||
import java.util.HashSet; | ||
import java.util.LinkedHashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Queue; | ||
import java.util.Set; | ||
import java.util.function.Function; | ||
import java.util.stream.Collectors; | ||
|
@@ -159,7 +157,7 @@ public class RelSubset extends AbstractRelNode { | |
* <ol> | ||
* <li>If the are no subsuming subsets, the subset is initially empty.</li> | ||
* <li>After creation, {@code best} and {@code bestCost} are maintained | ||
* incrementally by {@link #propagateCostImprovements0} and | ||
* incrementally by {@link VolcanoPlanner#propagateCostImprovements} and | ||
* {@link RelSet#mergeWith(VolcanoPlanner, RelSet)}.</li> | ||
* </ol> | ||
*/ | ||
|
@@ -375,76 +373,6 @@ RelNode buildCheapestPlan(VolcanoPlanner planner) { | |
return cheapest; | ||
} | ||
|
||
/** | ||
* Checks whether a relexp has made its subset cheaper, and if it so, | ||
* propagate new cost to parent rel nodes using breadth first manner. | ||
* | ||
* @param planner Planner | ||
* @param mq Metadata query | ||
* @param rel Relational expression whose cost has improved | ||
* @param activeSet Set of active subsets, for cycle detection | ||
*/ | ||
void propagateCostImprovements(VolcanoPlanner planner, RelMetadataQuery mq, | ||
RelNode rel, Set<RelSubset> activeSet) { | ||
Queue<Pair<RelSubset, RelNode>> propagationQueue = new ArrayDeque<>(); | ||
for (RelSubset subset : set.subsets) { | ||
if (rel.getTraitSet().satisfies(subset.traitSet)) { | ||
propagationQueue.offer(Pair.of(subset, rel)); | ||
} | ||
} | ||
|
||
while (!propagationQueue.isEmpty()) { | ||
Pair<RelSubset, RelNode> p = propagationQueue.poll(); | ||
p.left.propagateCostImprovements0(planner, mq, p.right, activeSet, propagationQueue); | ||
} | ||
} | ||
|
||
void propagateCostImprovements0(VolcanoPlanner planner, RelMetadataQuery mq, | ||
RelNode rel, Set<RelSubset> activeSet, | ||
Queue<Pair<RelSubset, RelNode>> propagationQueue) { | ||
++timestamp; | ||
|
||
if (!activeSet.add(this)) { | ||
// This subset is already in the chain being propagated to. This | ||
// means that the graph is cyclic, and therefore the cost of this | ||
// relational expression - not this subset - must be infinite. | ||
LOGGER.trace("cyclic: {}", this); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cyclic check has been removed, does it mean the code is useless now ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The cyclic check is necessary for the old update logic, i.e. DFS. Now since it is a Dijkstra like algorithm, always propagating the changed relNode with smallest best cost, the update will automatically stop after traveling a full cycle. So no special handling is needed any more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any other code that we can check a cyclic path now ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if there's any. Note that the cycle detection code I deleted here is already not working in this BFS implementation. It is left-over dead code when we changed from DFS to BFS. |
||
return; | ||
} | ||
try { | ||
RelOptCost cost = planner.getCost(rel, mq); | ||
|
||
// Update subset best cost when we find a cheaper rel or the current | ||
// best's cost is changed | ||
if (cost.isLt(bestCost)) { | ||
LOGGER.trace("Subset cost changed: subset [{}] cost was {} now {}", | ||
this, bestCost, cost); | ||
|
||
bestCost = cost; | ||
best = rel; | ||
upperBound = bestCost; | ||
// since best was changed, cached metadata for this subset should be removed | ||
mq.clearCache(this); | ||
|
||
// Propagate cost change to parents | ||
for (RelNode parent : getParents()) { | ||
// removes parent cached metadata since its input was changed | ||
mq.clearCache(parent); | ||
final RelSubset parentSubset = planner.getSubset(parent); | ||
|
||
// parent subset will clear its cache in propagateCostImprovements0 method itself | ||
for (RelSubset subset : parentSubset.set.subsets) { | ||
if (parent.getTraitSet().satisfies(subset.traitSet)) { | ||
propagationQueue.offer(Pair.of(subset, parent)); | ||
} | ||
} | ||
} | ||
} | ||
} finally { | ||
activeSet.remove(this); | ||
} | ||
} | ||
|
||
@Override public void collectVariablesUsed(Set<CorrelationId> variableSet) { | ||
variableSet.addAll(set.variablesUsed); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,6 +73,7 @@ | |
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.PriorityQueue; | ||
import java.util.Set; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
@@ -884,13 +885,9 @@ void rename(RelNode rel) { | |
final RelSubset equivSubset = getSubset(equivRel); | ||
for (RelSubset s : subset.set.subsets) { | ||
if (s.best == rel) { | ||
Set<RelSubset> activeSet = new HashSet<>(); | ||
s.best = equivRel; | ||
|
||
// Propagate cost improvement since this potentially would change the subset's best cost | ||
s.propagateCostImprovements( | ||
this, equivRel.getCluster().getMetadataQuery(), | ||
equivRel, activeSet); | ||
propagateCostImprovements(equivRel); | ||
} | ||
} | ||
|
||
|
@@ -906,6 +903,67 @@ void rename(RelNode rel) { | |
} | ||
} | ||
|
||
/** | ||
* Checks whether a relexp has made any subset cheaper, and if it so, | ||
* propagate new cost to parent rel nodes. | ||
* | ||
* @param rel Relational expression whose cost has improved | ||
*/ | ||
void propagateCostImprovements(RelNode rel) { | ||
RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); | ||
Map<RelNode, RelOptCost> propagateRels = new HashMap<>(); | ||
PriorityQueue<RelNode> propagateHeap = new PriorityQueue<>((o1, o2) -> { | ||
RelOptCost c1 = propagateRels.get(o1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After this change, the propagation is neither There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although not exactly BFS, but it (Dijkstra) works very similar to BFS, I'd like to view it as a controlled special type of BFS. I think the heap size here and the queue size in BFS should be about the same. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is different with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or, can we make the promotion evidence more clear ? Is there any possibility that we do some benchmark test ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you consider cost as a kind of distance between relnodes/subsets, this propagation process is basically Dijkstra in a directed graph. Computing the best plan in this directed graph is finding the "shortest" path. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did compare the running time using some big queries, with the patch the whole volcano phase is about 5% faster. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, +1 for this change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks :D |
||
RelOptCost c2 = propagateRels.get(o2); | ||
if (c1.equals(c2)) { | ||
return 0; | ||
} else if (c1.isLt(c2)) { | ||
return -1; | ||
} | ||
return 1; | ||
}); | ||
propagateRels.put(rel, getCost(rel, mq)); | ||
propagateHeap.offer(rel); | ||
|
||
while (!propagateHeap.isEmpty()) { | ||
RelNode relNode = propagateHeap.poll(); | ||
RelOptCost cost = propagateRels.get(relNode); | ||
|
||
for (RelSubset subset : getSet(relNode).subsets) { | ||
if (!relNode.getTraitSet().satisfies(subset.getTraitSet())) { | ||
continue; | ||
} | ||
if (!cost.isLt(subset.bestCost)) { | ||
continue; | ||
} | ||
// Update subset best cost when we find a cheaper rel or the current | ||
// best's cost is changed | ||
subset.timestamp++; | ||
LOGGER.trace("Subset cost changed: subset [{}] cost was {} now {}", | ||
subset, subset.bestCost, cost); | ||
|
||
subset.bestCost = cost; | ||
subset.best = relNode; | ||
// since best was changed, cached metadata for this subset should be removed | ||
mq.clearCache(subset); | ||
|
||
for (RelNode parent : subset.getParents()) { | ||
mq.clearCache(parent); | ||
RelOptCost newCost = getCost(parent, mq); | ||
RelOptCost existingCost = propagateRels.get(parent); | ||
if (existingCost == null || newCost.isLt(existingCost)) { | ||
propagateRels.put(parent, newCost); | ||
if (existingCost != null) { | ||
// Cost reduced, force the heap to adjust its ordering | ||
propagateHeap.remove(parent); | ||
} | ||
propagateHeap.offer(parent); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Registers a {@link RelNode}, which has already been registered, in a new | ||
* {@link RelSet}. | ||
|
@@ -1263,9 +1321,8 @@ private RelSubset addRelToSet(RelNode rel, RelSet set) { | |
// 100. We think this happens because the back-links to parents are | ||
// not established. So, give the subset another chance to figure out | ||
// its cost. | ||
final RelMetadataQuery mq = rel.getCluster().getMetadataQuery(); | ||
try { | ||
subset.propagateCostImprovements(this, mq, rel, new HashSet<>()); | ||
propagateCostImprovements(rel); | ||
} catch (CyclicMetadataException e) { | ||
// ignore | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
changedSubsets
keys are never used.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed