Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions src/query/sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ chrono = { workspace = true }
dashmap = "5.4"
globiter = "0.1"
itertools = "0.10.5"
num-derive = "0.3.3"
num-traits = "0.2.15"
once_cell = "1.15.0"
opendal = { workspace = true }
ordered-float = { workspace = true }
Expand Down
25 changes: 12 additions & 13 deletions src/query/sql/src/planner/optimizer/cascades/cascade.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ use std::sync::Arc;
use common_catalog::table_context::TableContext;
use common_exception::ErrorCode;
use common_exception::Result;
use roaring::RoaringBitmap;

use super::explore_rules::calc_explore_rule_set;
use super::explore_rules::get_explore_rule_set;
use crate::optimizer::cascades::scheduler::Scheduler;
use crate::optimizer::cascades::tasks::OptimizeGroupTask;
use crate::optimizer::cascades::tasks::Task;
Expand All @@ -30,36 +29,36 @@ use crate::optimizer::cost::DefaultCostModel;
use crate::optimizer::format::display_memo;
use crate::optimizer::memo::Memo;
use crate::optimizer::rule::TransformResult;
use crate::optimizer::RuleSet;
use crate::optimizer::SExpr;
use crate::IndexType;
use crate::MetadataRef;

/// A cascades-style search engine to enumerate possible alternations of a relational expression and
/// find the optimal one.
pub struct CascadesOptimizer {
pub memo: Memo,
pub explore_rule_set: roaring::RoaringBitmap,

pub cost_model: Box<dyn CostModel>,

pub(crate) memo: Memo,
pub(crate) cost_model: Box<dyn CostModel>,
/// group index -> best cost context
pub best_cost_map: HashMap<IndexType, CostContext>,
_ctx: Arc<dyn TableContext>,
pub(crate) best_cost_map: HashMap<IndexType, CostContext>,
pub(crate) explore_rule_set: RuleSet,
pub(crate) metadata: MetadataRef,
}

impl CascadesOptimizer {
pub fn create(ctx: Arc<dyn TableContext>) -> Result<Self> {
pub fn create(ctx: Arc<dyn TableContext>, metadata: MetadataRef) -> Result<Self> {
let enable_bushy_join = ctx.get_settings().get_enable_bushy_join()? != 0;
let explore_rule_set = if ctx.get_settings().get_enable_cbo()? {
calc_explore_rule_set(enable_bushy_join)
get_explore_rule_set(enable_bushy_join)
} else {
RoaringBitmap::new()
RuleSet::create()
};
Ok(CascadesOptimizer {
memo: Memo::create(),
cost_model: Box::new(DefaultCostModel),
best_cost_map: HashMap::new(),
_ctx: ctx,
explore_rule_set,
metadata,
})
}

Expand Down
23 changes: 13 additions & 10 deletions src/query/sql/src/planner/optimizer/cascades/explore_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,30 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use roaring::RoaringBitmap;

use crate::optimizer::RuleID;
use crate::optimizer::RuleSet;

pub fn calc_explore_rule_set(enable_bushy_join: bool) -> RoaringBitmap {
pub fn get_explore_rule_set(enable_bushy_join: bool) -> RuleSet {
if enable_bushy_join {
calc_join_rule_set_rs_b2()
rule_set_rs_b2()
} else {
calc_join_rule_set_rs_l1()
rule_set_rs_l1()
}
}

/// Get rule set of join order RS-B2, which may generate bushy trees.
/// Read paper "The Complexity of Transformation-Based Join Enumeration" for more details.
fn calc_join_rule_set_rs_b2() -> RoaringBitmap {
(RuleID::CommuteJoin as u32..RuleID::CommuteJoinBaseTable as u32).collect::<RoaringBitmap>()
fn rule_set_rs_b2() -> RuleSet {
RuleSet::create_with_ids(vec![
RuleID::CommuteJoin,
RuleID::LeftAssociateJoin,
RuleID::RightAssociateJoin,
RuleID::ExchangeJoin,
])
}

/// Get rule set of join order RS-L1, which will only generate left-deep trees.
/// Read paper "The Complexity of Transformation-Based Join Enumeration" for more details.
fn calc_join_rule_set_rs_l1() -> RoaringBitmap {
(RuleID::CommuteJoinBaseTable as u32..RuleID::RightExchangeJoin as u32)
.collect::<RoaringBitmap>()
fn rule_set_rs_l1() -> RuleSet {
RuleSet::create_with_ids(vec![RuleID::CommuteJoinBaseTable, RuleID::LeftExchangeJoin])
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ impl ApplyRuleTask {
let group = optimizer.memo.group(self.target_group_index)?;
let m_expr = group.m_expr(self.m_expr_index)?;
let mut state = TransformResult::new();
let rule = RuleFactory::create().create_rule(self.rule_id, None)?;
let rule = RuleFactory::create_rule(self.rule_id, optimizer.metadata.clone())?;
m_expr.apply_rule(&optimizer.memo, &rule, &mut state)?;
optimizer.insert_from_transform_state(self.target_group_index, state)?;

Expand Down
20 changes: 2 additions & 18 deletions src/query/sql/src/planner/optimizer/cascades/tasks/explore_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ use super::Task;
use crate::optimizer::cascades::scheduler::Scheduler;
use crate::optimizer::cascades::tasks::SharedCounter;
use crate::optimizer::cascades::CascadesOptimizer;
use crate::optimizer::RuleID;
use crate::optimizer::RULE_FACTORY;
use crate::plans::Operator;
use crate::plans::RelOperator;
use crate::IndexType;

#[derive(Clone, Copy, Debug)]
Expand Down Expand Up @@ -144,18 +140,6 @@ impl ExploreExprTask {
}
}

fn calc_operator_rule_set(
&self,
optimizer: &CascadesOptimizer,
operator: &RelOperator,
) -> roaring::RoaringBitmap {
unsafe {
operator.exploration_candidate_rules()
& (&RULE_FACTORY.exploration_rules)
& (&optimizer.explore_rule_set)
}
}

fn explore_self(
&mut self,
optimizer: &mut CascadesOptimizer,
Expand All @@ -165,11 +149,11 @@ impl ExploreExprTask {
.memo
.group(self.group_index)?
.m_expr(self.m_expr_index)?;
let rule_set = self.calc_operator_rule_set(optimizer, &m_expr.plan);
let rule_set = &optimizer.explore_rule_set;

for rule_id in rule_set.iter() {
let apply_rule_task = ApplyRuleTask::with_parent(
unsafe { std::mem::transmute::<u8, RuleID>(rule_id as u8) },
rule_id,
m_expr.group_index,
m_expr.index,
&self.ref_count,
Expand Down
23 changes: 3 additions & 20 deletions src/query/sql/src/planner/optimizer/heuristic/heuristic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,11 @@ use once_cell::sync::Lazy;

use super::prune_unused_columns::UnusedColumnPruner;
use crate::optimizer::heuristic::decorrelate::decorrelate_subquery;
use crate::optimizer::rule::RulePtr;
use crate::optimizer::rule::TransformResult;
use crate::optimizer::ColumnSet;
use crate::optimizer::RuleFactory;
use crate::optimizer::RuleID;
use crate::optimizer::SExpr;
use crate::optimizer::RULE_FACTORY;
use crate::plans::Operator;
use crate::plans::RelOperator;
use crate::BindContext;
use crate::MetadataRef;

Expand Down Expand Up @@ -118,27 +115,13 @@ impl HeuristicOptimizer {
Ok(result)
}

fn calc_operator_rule_set(&self, operator: &RelOperator) -> roaring::RoaringBitmap {
unsafe { operator.transformation_candidate_rules() & (&RULE_FACTORY.transformation_rules) }
}

fn get_rule(&self, rule_id: u32) -> Result<RulePtr> {
unsafe {
RULE_FACTORY.create_rule(
DEFAULT_REWRITE_RULES[rule_id as usize],
Some(self.metadata.clone()),
)
}
}

/// Try to apply the rules to the expression.
/// Return the final result that no rule can be applied.
fn apply_transform_rules(&self, s_expr: &SExpr) -> Result<SExpr> {
let mut s_expr = s_expr.clone();
let rule_set = self.calc_operator_rule_set(&s_expr.plan);

for rule_id in rule_set.iter() {
let rule = self.get_rule(rule_id)?;
for rule_id in DEFAULT_REWRITE_RULES.iter() {
let rule = RuleFactory::create_rule(*rule_id, self.metadata.clone())?;
let mut state = TransformResult::new();
if s_expr.match_pattern(rule.pattern()) && !s_expr.applied_rule(&rule.id()) {
s_expr.set_applied_rule(&rule.id());
Expand Down
1 change: 0 additions & 1 deletion src/query/sql/src/planner/optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,4 @@ pub use rule::try_push_down_filter_join;
pub use rule::RuleFactory;
pub use rule::RuleID;
pub use rule::RuleSet;
pub use rule::RULE_FACTORY;
pub use s_expr::SExpr;
8 changes: 4 additions & 4 deletions src/query/sql/src/planner/optimizer/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ pub fn optimize_query(
) -> Result<SExpr> {
let contains_local_table_scan = contains_local_table_scan(&s_expr, &metadata);

let mut heuristic = HeuristicOptimizer::new(ctx.clone(), bind_context, metadata);
let mut heuristic = HeuristicOptimizer::new(ctx.clone(), bind_context, metadata.clone());
let mut result = heuristic.optimize(s_expr)?;

let mut cascades = CascadesOptimizer::create(ctx.clone())?;
let mut cascades = CascadesOptimizer::create(ctx.clone(), metadata)?;
result = cascades.optimize(result)?;

// So far, we don't have ability to execute distributed query
Expand Down Expand Up @@ -178,10 +178,10 @@ fn get_optimized_memo(
metadata: MetadataRef,
bind_context: Box<BindContext>,
) -> Result<(Memo, HashMap<IndexType, CostContext>)> {
let mut heuristic = HeuristicOptimizer::new(ctx.clone(), bind_context, metadata);
let mut heuristic = HeuristicOptimizer::new(ctx.clone(), bind_context, metadata.clone());
let result = heuristic.optimize(s_expr)?;

let mut cascades = CascadesOptimizer::create(ctx)?;
let mut cascades = CascadesOptimizer::create(ctx, metadata)?;
cascades.optimize(result)?;
Ok((cascades.memo, cascades.best_cost_map))
}
32 changes: 5 additions & 27 deletions src/query/sql/src/planner/optimizer/rule/factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// limitations under the License.

use common_exception::Result;
use once_cell::sync::Lazy;
use roaring::RoaringBitmap;

use super::rewrite::RuleEliminateEvalScalar;
use super::rewrite::RuleFoldCountAggregate;
Expand Down Expand Up @@ -49,36 +47,16 @@ use crate::optimizer::rule::RuleID;
use crate::optimizer::rule::RulePtr;
use crate::MetadataRef;

// read only, so thread safe
pub static mut RULE_FACTORY: Lazy<RuleFactory> = Lazy::new(RuleFactory::create);

pub struct RuleFactory {
pub transformation_rules: roaring::RoaringBitmap,
pub exploration_rules: roaring::RoaringBitmap,
}
pub struct RuleFactory;

impl RuleFactory {
pub fn create() -> Self {
RuleFactory {
transformation_rules: (RuleID::NormalizeScalarFilter as u32
..RuleID::CommuteJoin as u32)
.collect::<RoaringBitmap>(),
exploration_rules: (RuleID::CommuteJoin as u32..(RuleID::RightExchangeJoin as u32) + 1)
.collect::<RoaringBitmap>(),
}
}

pub fn create_rule(&self, id: RuleID, metadata: Option<MetadataRef>) -> Result<RulePtr> {
pub fn create_rule(id: RuleID, metadata: MetadataRef) -> Result<RulePtr> {
match id {
RuleID::EliminateEvalScalar => Ok(Box::new(RuleEliminateEvalScalar::new())),
RuleID::PushDownFilterUnion => Ok(Box::new(RulePushDownFilterUnion::new())),
RuleID::PushDownFilterEvalScalar => Ok(Box::new(RulePushDownFilterEvalScalar::new())),
RuleID::PushDownFilterJoin => {
Ok(Box::new(RulePushDownFilterJoin::new(metadata.unwrap())))
}
RuleID::PushDownFilterScan => {
Ok(Box::new(RulePushDownFilterScan::new(metadata.unwrap())))
}
RuleID::PushDownFilterJoin => Ok(Box::new(RulePushDownFilterJoin::new(metadata))),
RuleID::PushDownFilterScan => Ok(Box::new(RulePushDownFilterScan::new(metadata))),
RuleID::PushDownFilterSort => Ok(Box::new(RulePushDownFilterSort::new())),
RuleID::PushDownLimitUnion => Ok(Box::new(RulePushDownLimitUnion::new())),
RuleID::PushDownLimitScan => Ok(Box::new(RulePushDownLimitScan::new())),
Expand All @@ -104,7 +82,7 @@ impl RuleFactory {
RuleID::LeftExchangeJoin => Ok(Box::new(RuleLeftExchangeJoin::new())),
RuleID::RightExchangeJoin => Ok(Box::new(RuleRightExchangeJoin::new())),
RuleID::ExchangeJoin => Ok(Box::new(RuleExchangeJoin::new())),
RuleID::PushDownPrewhere => Ok(Box::new(RulePushDownPrewhere::new(metadata.unwrap()))),
RuleID::PushDownPrewhere => Ok(Box::new(RulePushDownPrewhere::new(metadata))),
}
}
}
1 change: 0 additions & 1 deletion src/query/sql/src/planner/optimizer/rule/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ mod transform;
mod transform_result;

pub use factory::RuleFactory;
pub use factory::RULE_FACTORY;
pub use rewrite::try_push_down_filter_join;
pub use rule::Rule;
pub use rule::RuleID;
Expand Down
4 changes: 3 additions & 1 deletion src/query/sql/src/planner/optimizer/rule/rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use std::fmt::Display;
use std::fmt::Formatter;

use common_exception::Result;
use num_derive::FromPrimitive;
use num_derive::ToPrimitive;

use crate::optimizer::rule::TransformResult;
use crate::optimizer::SExpr;
Expand All @@ -36,7 +38,7 @@ pub trait Rule {

// If add a new rule, please add it to the operator's corresponding `transformation_candidate_rules`
// Such as `PushDownFilterAggregate` is related to `Filter` operator.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, FromPrimitive, ToPrimitive)]
pub enum RuleID {
// Rewrite rules
NormalizeScalarFilter,
Expand Down
Loading