From c7d413912aad059b74ecb726773334f95a6ec943 Mon Sep 17 00:00:00 2001 From: baishen Date: Fri, 14 Nov 2025 17:22:32 +0800 Subject: [PATCH] chore(query): Optimize Optimizer Performance by Reducing Redundant Computations --- .../planner/optimizer/optimizer_context.rs | 53 +++++++++++-------- .../optimizers/recursive/recursive.rs | 46 +++++++++------- .../sql/src/planner/semantic/type_check.rs | 9 ++++ 3 files changed, 67 insertions(+), 41 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/optimizer_context.rs b/src/query/sql/src/planner/optimizer/optimizer_context.rs index a12abfc8ad600..695bd266b72a2 100644 --- a/src/query/sql/src/planner/optimizer/optimizer_context.rs +++ b/src/query/sql/src/planner/optimizer/optimizer_context.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::collections::HashMap; +use std::collections::HashSet; use std::sync::Arc; use databend_common_catalog::table_context::TableContext; @@ -38,6 +39,10 @@ pub struct OptimizerContext { enable_dphyp: RwLock, max_push_down_limit: RwLock, planning_agg_index: RwLock, + skip_list: HashSet, + skip_list_str: String, + grouping_sets_to_union: bool, + #[educe(Debug(ignore))] sample_executor: RwLock>>, @@ -52,6 +57,20 @@ pub struct OptimizerContext { impl OptimizerContext { pub fn new(table_ctx: Arc, metadata: MetadataRef) -> Arc { + let settings = table_ctx.get_settings(); + let grouping_sets_to_union = settings.get_grouping_sets_to_union().unwrap_or_default(); + + let (skip_list_str, skip_list) = match settings.get_optimizer_skip_list() { + Ok(skip_list_str) if !skip_list_str.is_empty() => { + let skip_list = skip_list_str + .split(',') + .map(|item| item.trim().to_lowercase()) + .collect::>(); + (skip_list_str.to_string(), skip_list) + } + _ => ("".to_string(), HashSet::new()), + }; + Arc::new(Self { table_ctx, metadata, @@ -62,6 +81,9 @@ impl OptimizerContext { max_push_down_limit: RwLock::new(10000), sample_executor: RwLock::new(None), planning_agg_index: RwLock::new(false), + skip_list, + skip_list_str, + grouping_sets_to_union, flags: RwLock::new(HashMap::new()), enable_trace: RwLock::new(false), }) @@ -148,33 +170,22 @@ impl OptimizerContext { /// Check if an optimizer or rule is disabled based on optimizer_skip_list setting pub fn is_optimizer_disabled(&self, name: &str) -> bool { - let settings = self.get_table_ctx().get_settings(); - - if !settings.get_grouping_sets_to_union().unwrap_or_default() + if !self.grouping_sets_to_union && (name == RuleID::GroupingSetsToUnion.to_string() || name == RuleID::HierarchicalGroupingSetsToUnion.to_string()) { return true; } - match settings.get_optimizer_skip_list() { - Ok(skip_list) if !skip_list.is_empty() => { - let name_lower = name.to_lowercase(); - let is_disabled = skip_list - .split(',') - .map(str::trim) - .any(|item| item.to_lowercase() == name_lower); - - if is_disabled { - log::warn!( - "Skipping optimizer component: {} (found in optimizer_skip_list: {})", - name, - skip_list - ); - } - is_disabled - } - _ => false, + let name_lower = name.to_lowercase(); + let is_disabled = self.skip_list.contains(&name_lower); + if is_disabled { + log::warn!( + "Skipping optimizer component: {} (found in optimizer_skip_list: {})", + name, + self.skip_list_str + ); } + is_disabled } } diff --git a/src/query/sql/src/planner/optimizer/optimizers/recursive/recursive.rs b/src/query/sql/src/planner/optimizer/optimizers/recursive/recursive.rs index 798f1522a7b76..3a4f17bda7ea7 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/recursive/recursive.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/recursive/recursive.rs @@ -51,23 +51,30 @@ impl RecursiveRuleOptimizer { #[recursive::recursive] fn optimize_expression(&self, s_expr: &SExpr) -> Result { - let mut optimized_children = Vec::with_capacity(s_expr.arity()); - let mut children_changed = false; - for expr in s_expr.children() { - let optimized_child = self.optimize_sync(expr)?; - if !optimized_child.eq(expr) { - children_changed = true; + let mut current = s_expr.clone(); + + loop { + let mut optimized_children = Vec::with_capacity(current.arity()); + let mut children_changed = false; + for expr in current.children() { + let optimized_child = self.optimize_sync(expr)?; + if !optimized_child.eq(expr) { + children_changed = true; + } + optimized_children.push(Arc::new(optimized_child)); } - optimized_children.push(Arc::new(optimized_child)); - } - let mut optimized_expr = s_expr.clone(); - if children_changed { - optimized_expr = s_expr.replace_children(optimized_children); - } - let result = self.apply_transform_rules(&optimized_expr, self.rules)?; + if children_changed { + current = current.replace_children(optimized_children); + } - Ok(result) + match self.apply_transform_rules(¤t, self.rules)? { + Some(new_expr) => { + current = new_expr; + } + None => return Ok(current), + } + } } /// Trace rule execution, regardless of whether the rule had an effect @@ -104,7 +111,7 @@ impl RecursiveRuleOptimizer { Ok(()) } - fn apply_transform_rules(&self, s_expr: &SExpr, rules: &[RuleID]) -> Result { + fn apply_transform_rules(&self, s_expr: &SExpr, rules: &[RuleID]) -> Result> { let mut s_expr = s_expr.clone(); for rule_id in rules { let rule = RuleFactory::create_rule(*rule_id, self.ctx.clone())?; @@ -129,8 +136,8 @@ impl RecursiveRuleOptimizer { { s_expr.set_applied_rule(&rule.id()); rule.apply(&s_expr, &mut state)?; - if !state.results().is_empty() { - let result = &state.results()[0]; + if let Some(result) = state.results().first() { + let result = result.clone(); // For tracing only if trace_enabled { @@ -138,8 +145,7 @@ impl RecursiveRuleOptimizer { self.trace_rule_execution(rule.name(), duration, &before_expr, &state)?; } - let optimized_result = self.optimize_expression(result)?; - return Ok(optimized_result); + return Ok(Some(result)); } } @@ -150,7 +156,7 @@ impl RecursiveRuleOptimizer { } } - Ok(s_expr) + Ok(None) } } diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 36fe8cf3c3b69..c84452c55fa4b 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -4939,6 +4939,7 @@ impl<'a> TypeChecker<'a> { Some(Vec::with_capacity(exprs.len())); let mut element_type: Option = None; + let mut data_type_set = HashSet::with_capacity(2); for expr in exprs { let box (arg, data_type) = self.resolve(expr)?; if let Some(values) = constant_values.as_mut() { @@ -4948,6 +4949,13 @@ impl<'a> TypeChecker<'a> { _ => None, }; if let Some(value) = maybe_constant { + // If the data type has already been computed, + // we don't need to compute the common type again. + if data_type_set.contains(&data_type) { + elems.push(arg); + values.push((value, data_type)); + continue; + } element_type = if let Some(current_ty) = element_type.clone() { common_super_type( current_ty.clone(), @@ -4959,6 +4967,7 @@ impl<'a> TypeChecker<'a> { }; if element_type.is_some() { + data_type_set.insert(data_type.clone()); values.push((value, data_type)); } else { constant_values = None;