-
Notifications
You must be signed in to change notification settings - Fork 717
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(planner): Correct the mapping relationship between RuleID and DEFAULT_REWRITE_RULES #10508
Changes from all commits
1f95502
376df9e
14fa243
1e616ab
3a2dde9
8ccbec5
32d8085
1635945
ce03435
91699b3
c906fd3
25a7387
b2bc1fa
a27804b
88cc019
5634fc8
53ee10e
e78643d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,9 @@ use std::sync::Arc; | |
use common_catalog::table_context::TableContext; | ||
use common_exception::ErrorCode; | ||
use common_exception::Result; | ||
use roaring::RoaringBitmap; | ||
|
||
use super::explore_rules::get_explore_rule_set; | ||
use super::explore_rules::calc_explore_rule_set; | ||
use crate::optimizer::cascades::scheduler::Scheduler; | ||
use crate::optimizer::cascades::tasks::OptimizeGroupTask; | ||
use crate::optimizer::cascades::tasks::Task; | ||
|
@@ -29,36 +30,36 @@ use crate::optimizer::cost::DefaultCostModel; | |
use crate::optimizer::format::display_memo; | ||
use crate::optimizer::memo::Memo; | ||
use crate::optimizer::rule::TransformResult; | ||
use crate::optimizer::RuleSet; | ||
use crate::optimizer::SExpr; | ||
use crate::IndexType; | ||
use crate::MetadataRef; | ||
|
||
/// A cascades-style search engine to enumerate possible alternations of a relational expression and | ||
/// find the optimal one. | ||
pub struct CascadesOptimizer { | ||
pub(crate) memo: Memo, | ||
pub(crate) cost_model: Box<dyn CostModel>, | ||
pub memo: Memo, | ||
pub explore_rule_set: roaring::RoaringBitmap, | ||
|
||
pub cost_model: Box<dyn CostModel>, | ||
|
||
/// group index -> best cost context | ||
pub(crate) best_cost_map: HashMap<IndexType, CostContext>, | ||
pub(crate) explore_rule_set: RuleSet, | ||
pub(crate) metadata: MetadataRef, | ||
pub best_cost_map: HashMap<IndexType, CostContext>, | ||
_ctx: Arc<dyn TableContext>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Introducing |
||
} | ||
|
||
impl CascadesOptimizer { | ||
pub fn create(ctx: Arc<dyn TableContext>, metadata: MetadataRef) -> Result<Self> { | ||
pub fn create(ctx: Arc<dyn TableContext>) -> Result<Self> { | ||
let enable_bushy_join = ctx.get_settings().get_enable_bushy_join()? != 0; | ||
let explore_rule_set = if ctx.get_settings().get_enable_cbo()? { | ||
get_explore_rule_set(enable_bushy_join) | ||
calc_explore_rule_set(enable_bushy_join) | ||
} else { | ||
RuleSet::create() | ||
RoaringBitmap::new() | ||
}; | ||
Ok(CascadesOptimizer { | ||
memo: Memo::create(), | ||
cost_model: Box::new(DefaultCostModel), | ||
best_cost_map: HashMap::new(), | ||
_ctx: ctx, | ||
explore_rule_set, | ||
metadata, | ||
}) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,30 +12,27 @@ | |
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use roaring::RoaringBitmap; | ||
|
||
use crate::optimizer::RuleID; | ||
use crate::optimizer::RuleSet; | ||
|
||
pub fn get_explore_rule_set(enable_bushy_join: bool) -> RuleSet { | ||
pub fn calc_explore_rule_set(enable_bushy_join: bool) -> RoaringBitmap { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please refrain from breaking the current abstraction. |
||
if enable_bushy_join { | ||
rule_set_rs_b2() | ||
calc_join_rule_set_rs_b2() | ||
} else { | ||
rule_set_rs_l1() | ||
calc_join_rule_set_rs_l1() | ||
} | ||
} | ||
|
||
/// Get rule set of join order RS-B2, which may generate bushy trees. | ||
/// Read paper "The Complexity of Transformation-Based Join Enumeration" for more details. | ||
fn rule_set_rs_b2() -> RuleSet { | ||
RuleSet::create_with_ids(vec![ | ||
RuleID::CommuteJoin, | ||
RuleID::LeftAssociateJoin, | ||
RuleID::RightAssociateJoin, | ||
RuleID::ExchangeJoin, | ||
]) | ||
fn calc_join_rule_set_rs_b2() -> RoaringBitmap { | ||
(RuleID::CommuteJoin as u32..RuleID::CommuteJoinBaseTable as u32).collect::<RoaringBitmap>() | ||
} | ||
|
||
/// Get rule set of join order RS-L1, which will only generate left-deep trees. | ||
/// Read paper "The Complexity of Transformation-Based Join Enumeration" for more details. | ||
fn rule_set_rs_l1() -> RuleSet { | ||
RuleSet::create_with_ids(vec![RuleID::CommuteJoinBaseTable, RuleID::LeftExchangeJoin]) | ||
fn calc_join_rule_set_rs_l1() -> RoaringBitmap { | ||
(RuleID::CommuteJoinBaseTable as u32..RuleID::RightExchangeJoin as u32) | ||
.collect::<RoaringBitmap>() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,10 @@ use super::Task; | |
use crate::optimizer::cascades::scheduler::Scheduler; | ||
use crate::optimizer::cascades::tasks::SharedCounter; | ||
use crate::optimizer::cascades::CascadesOptimizer; | ||
use crate::optimizer::RuleID; | ||
use crate::optimizer::RULE_FACTORY; | ||
use crate::plans::Operator; | ||
use crate::plans::RelOperator; | ||
use crate::IndexType; | ||
|
||
#[derive(Clone, Copy, Debug)] | ||
|
@@ -140,6 +144,18 @@ impl ExploreExprTask { | |
} | ||
} | ||
|
||
fn calc_operator_rule_set( | ||
&self, | ||
optimizer: &CascadesOptimizer, | ||
operator: &RelOperator, | ||
) -> roaring::RoaringBitmap { | ||
unsafe { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
RULE_FACTORY is read-only, so it is thread-safe. Here, we need to use unsafe to solve the problem of syntax errors. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's not a good reason to use
This reasoning is flawed. Implementing unsafe practices everywhere will ultimately lead to increased long-term maintenance costs. |
||
operator.exploration_candidate_rules() | ||
& (&RULE_FACTORY.exploration_rules) | ||
& (&optimizer.explore_rule_set) | ||
} | ||
} | ||
|
||
fn explore_self( | ||
&mut self, | ||
optimizer: &mut CascadesOptimizer, | ||
|
@@ -149,11 +165,11 @@ impl ExploreExprTask { | |
.memo | ||
.group(self.group_index)? | ||
.m_expr(self.m_expr_index)?; | ||
let rule_set = &optimizer.explore_rule_set; | ||
let rule_set = self.calc_operator_rule_set(optimizer, &m_expr.plan); | ||
|
||
for rule_id in rule_set.iter() { | ||
let apply_rule_task = ApplyRuleTask::with_parent( | ||
rule_id, | ||
unsafe { std::mem::transmute::<u8, RuleID>(rule_id as u8) }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Transmuting an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ok, first of all, the current RuleID is safe under this transformation. could you all me to handle it with standard casts in next PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't make sense to use |
||
m_expr.group_index, | ||
m_expr.index, | ||
&self.ref_count, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,37 +20,45 @@ use once_cell::sync::Lazy; | |
|
||
use super::prune_unused_columns::UnusedColumnPruner; | ||
use crate::optimizer::heuristic::decorrelate::decorrelate_subquery; | ||
use crate::optimizer::rule::RulePtr; | ||
use crate::optimizer::rule::TransformResult; | ||
use crate::optimizer::ColumnSet; | ||
use crate::optimizer::RuleFactory; | ||
use crate::optimizer::RuleID; | ||
use crate::optimizer::SExpr; | ||
use crate::optimizer::RULE_FACTORY; | ||
use crate::plans::Operator; | ||
use crate::plans::RelOperator; | ||
use crate::BindContext; | ||
use crate::MetadataRef; | ||
|
||
pub static DEFAULT_REWRITE_RULES: Lazy<Vec<RuleID>> = Lazy::new(|| { | ||
vec![ | ||
// Filter | ||
RuleID::NormalizeDisjunctiveFilter, | ||
RuleID::NormalizeScalarFilter, | ||
RuleID::EliminateFilter, | ||
RuleID::EliminateEvalScalar, | ||
RuleID::MergeFilter, | ||
RuleID::MergeEvalScalar, | ||
RuleID::PushDownFilterUnion, | ||
RuleID::PushDownFilterAggregate, | ||
RuleID::PushDownFilterSort, | ||
RuleID::PushDownFilterEvalScalar, | ||
RuleID::PushDownFilterJoin, | ||
RuleID::PushDownFilterScan, | ||
// EvalScalar | ||
RuleID::EliminateEvalScalar, | ||
RuleID::MergeEvalScalar, | ||
// Limit | ||
RuleID::PushDownLimitUnion, | ||
RuleID::RulePushDownLimitExpression, | ||
RuleID::PushDownLimitSort, | ||
RuleID::PushDownLimitAggregate, | ||
RuleID::PushDownLimitOuterJoin, | ||
RuleID::PushDownLimitScan, | ||
RuleID::PushDownFilterSort, | ||
RuleID::PushDownFilterEvalScalar, | ||
RuleID::PushDownFilterJoin, | ||
// Agg | ||
RuleID::FoldCountAggregate, | ||
RuleID::SplitAggregate, | ||
RuleID::PushDownFilterScan, | ||
RuleID::PushDownPrewhere, /* PushDownPrwhere should be after all rules except PushDownFilterScan */ | ||
// Sort | ||
RuleID::PushDownSortScan, // PushDownFilterScan should be after PushDownPrewhere | ||
] | ||
}); | ||
|
@@ -113,13 +121,27 @@ impl HeuristicOptimizer { | |
Ok(result) | ||
} | ||
|
||
fn calc_operator_rule_set(&self, operator: &RelOperator) -> roaring::RoaringBitmap { | ||
unsafe { operator.transformation_candidate_rules() & (&RULE_FACTORY.transformation_rules) } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto, why do we need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
RULE_FACTORY is read-only, so it is thread-safe. Here, we need to use unsafe to solve the problem of syntax errors. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Doesn't make sense to me. |
||
} | ||
|
||
fn get_rule(&self, rule_id: u32) -> Result<RulePtr> { | ||
unsafe { | ||
RULE_FACTORY.create_rule( | ||
DEFAULT_REWRITE_RULES[rule_id as usize], | ||
Some(self.metadata.clone()), | ||
) | ||
} | ||
} | ||
|
||
/// Try to apply the rules to the expression. | ||
/// Return the final result that no rule can be applied. | ||
fn apply_transform_rules(&self, s_expr: &SExpr) -> Result<SExpr> { | ||
let mut s_expr = s_expr.clone(); | ||
let rule_set = self.calc_operator_rule_set(&s_expr.plan); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should distinguish between In this case, a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Please refer to the introduction of my implementation ideas above. If you use RuleList, you cannot quickly calculate the applicable rules for each operator, which will cause too many loops and reduce performance. And Please refer to the benchmark results There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any other question? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Could you please provide more details about the benchmark? The benefits appear to be relatively low. hits: from 1.01 to 0.99 tpch: from 1.02 to 0.98 Although performance is important, I prefer to prioritize code readability and maintainability. It's not worth sacrificing those aspects for the sake of minor performance improvements. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I disagree with this point of view. With the increase of rules, each rule will increase the number of cycles optimized by unrelated operators, and it is exponential. When I think of it, the performance comparison will be more obvious. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
You are correct, but for now, it is not worth it. We can optimize when we identify the bottleneck. |
||
|
||
for rule_id in DEFAULT_REWRITE_RULES.iter() { | ||
let rule = RuleFactory::create_rule(*rule_id, self.metadata.clone())?; | ||
for rule_id in rule_set.iter() { | ||
let rule = self.get_rule(rule_id)?; | ||
let mut state = TransformResult::new(); | ||
if s_expr.match_pattern(rule.pattern()) && !s_expr.applied_rule(&rule.id()) { | ||
s_expr.set_applied_rule(&rule.id()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,8 @@ | |
// limitations under the License. | ||
|
||
use common_exception::Result; | ||
use once_cell::sync::Lazy; | ||
use roaring::RoaringBitmap; | ||
|
||
use super::rewrite::RuleEliminateEvalScalar; | ||
use super::rewrite::RuleFoldCountAggregate; | ||
|
@@ -47,16 +49,36 @@ use crate::optimizer::rule::RuleID; | |
use crate::optimizer::rule::RulePtr; | ||
use crate::MetadataRef; | ||
|
||
pub struct RuleFactory; | ||
// read only, so thread safe | ||
pub static mut RULE_FACTORY: Lazy<RuleFactory> = Lazy::new(RuleFactory::create); | ||
|
||
pub struct RuleFactory { | ||
pub transformation_rules: roaring::RoaringBitmap, | ||
pub exploration_rules: roaring::RoaringBitmap, | ||
} | ||
|
||
impl RuleFactory { | ||
pub fn create_rule(id: RuleID, metadata: MetadataRef) -> Result<RulePtr> { | ||
pub fn create() -> Self { | ||
RuleFactory { | ||
transformation_rules: (RuleID::NormalizeScalarFilter as u32 | ||
..RuleID::CommuteJoin as u32) | ||
.collect::<RoaringBitmap>(), | ||
exploration_rules: (RuleID::CommuteJoin as u32..(RuleID::RightExchangeJoin as u32) + 1) | ||
.collect::<RoaringBitmap>(), | ||
} | ||
} | ||
Comment on lines
+62
to
+69
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand that code. Could you explain it further? Is it safe to use |
||
|
||
pub fn create_rule(&self, id: RuleID, metadata: Option<MetadataRef>) -> Result<RulePtr> { | ||
match id { | ||
RuleID::EliminateEvalScalar => Ok(Box::new(RuleEliminateEvalScalar::new())), | ||
RuleID::PushDownFilterUnion => Ok(Box::new(RulePushDownFilterUnion::new())), | ||
RuleID::PushDownFilterEvalScalar => Ok(Box::new(RulePushDownFilterEvalScalar::new())), | ||
RuleID::PushDownFilterJoin => Ok(Box::new(RulePushDownFilterJoin::new(metadata))), | ||
RuleID::PushDownFilterScan => Ok(Box::new(RulePushDownFilterScan::new(metadata))), | ||
RuleID::PushDownFilterJoin => { | ||
Ok(Box::new(RulePushDownFilterJoin::new(metadata.unwrap()))) | ||
} | ||
RuleID::PushDownFilterScan => { | ||
Ok(Box::new(RulePushDownFilterScan::new(metadata.unwrap()))) | ||
} | ||
RuleID::PushDownFilterSort => Ok(Box::new(RulePushDownFilterSort::new())), | ||
RuleID::PushDownLimitUnion => Ok(Box::new(RulePushDownLimitUnion::new())), | ||
RuleID::PushDownLimitScan => Ok(Box::new(RulePushDownLimitScan::new())), | ||
|
@@ -82,7 +104,7 @@ impl RuleFactory { | |
RuleID::LeftExchangeJoin => Ok(Box::new(RuleLeftExchangeJoin::new())), | ||
RuleID::RightExchangeJoin => Ok(Box::new(RuleRightExchangeJoin::new())), | ||
RuleID::ExchangeJoin => Ok(Box::new(RuleExchangeJoin::new())), | ||
RuleID::PushDownPrewhere => Ok(Box::new(RulePushDownPrewhere::new(metadata))), | ||
RuleID::PushDownPrewhere => Ok(Box::new(RulePushDownPrewhere::new(metadata.unwrap()))), | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we'd better not expose the
RoaringBitmap
here, should useRuleSet
instead.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I understand what you mean, I want to transform the RuleSet to use bitmap management, and then submit a PR. If there is no problem with this PR, can it be passed first?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please address it in this PR.