diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 844bdba7a0f2..2530f890f683 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -34,9 +34,14 @@ path = "examples/avro_sql.rs" required-features = ["datafusion/avro"] [dev-dependencies] +arrow = "24.0.0" arrow-flight = "24.0.0" async-trait = "0.1.41" datafusion = { path = "../datafusion/core" } +datafusion-common = { path = "../datafusion/common" } +datafusion-expr = { path = "../datafusion/expr" } +datafusion-optimizer = { path = "../datafusion/optimizer" } +datafusion-sql = { path = "../datafusion/sql" } futures = "0.3" num_cpus = "1.13.0" object_store = { version = "0.5.0", features = ["aws"] } diff --git a/datafusion-examples/examples/rewrite_expr.rs b/datafusion-examples/examples/rewrite_expr.rs new file mode 100644 index 000000000000..5e93bfd73787 --- /dev/null +++ b/datafusion-examples/examples/rewrite_expr.rs @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter}; +use datafusion_expr::{AggregateUDF, Expr, Filter, LogicalPlan, ScalarUDF, TableSource}; +use datafusion_optimizer::optimizer::Optimizer; +use datafusion_optimizer::{utils, OptimizerConfig, OptimizerRule}; +use datafusion_sql::planner::{ContextProvider, SqlToRel}; +use datafusion_sql::sqlparser::dialect::PostgreSqlDialect; +use datafusion_sql::sqlparser::parser::Parser; +use datafusion_sql::TableReference; +use std::any::Any; +use std::sync::Arc; + +pub fn main() -> Result<()> { + // produce a logical plan using the datafusion-sql crate + let dialect = PostgreSqlDialect {}; + let sql = "SELECT * FROM person WHERE age BETWEEN 21 AND 32"; + let statements = Parser::parse_sql(&dialect, sql)?; + + // produce a logical plan using the datafusion-sql crate + let context_provider = MyContextProvider {}; + let sql_to_rel = SqlToRel::new(&context_provider); + let logical_plan = sql_to_rel.sql_statement_to_plan(statements[0].clone())?; + println!( + "Unoptimized Logical Plan:\n\n{}\n", + logical_plan.display_indent() + ); + + // now run the optimizer with our custom rule + let optimizer = Optimizer::with_rules(vec![Arc::new(MyRule {})]); + let mut optimizer_config = OptimizerConfig::default().with_skip_failing_rules(false); + let optimized_plan = + optimizer.optimize(&logical_plan, &mut optimizer_config, observe)?; + println!( + "Optimized Logical Plan:\n\n{}\n", + optimized_plan.display_indent() + ); + + Ok(()) +} + +fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) { + println!( + "After applying rule '{}':\n{}\n", + rule.name(), + plan.display_indent() + ) +} + +struct MyRule {} + +impl OptimizerRule for MyRule { + fn name(&self) -> &str { + "my_rule" + } + + fn optimize( + &self, + plan: &LogicalPlan, + _config: &mut OptimizerConfig, + ) -> Result { + // recurse down and optimize children first + let plan = utils::optimize_children(self, plan, _config)?; + + match plan { + LogicalPlan::Filter(filter) => { + let mut expr_rewriter = MyExprRewriter {}; + let predicate = filter.predicate().clone(); + let predicate = predicate.rewrite(&mut expr_rewriter)?; + Ok(LogicalPlan::Filter(Filter::try_new( + predicate, + filter.input().clone(), + )?)) + } + _ => Ok(plan.clone()), + } + } +} + +struct MyExprRewriter {} + +impl ExprRewriter for MyExprRewriter { + fn mutate(&mut self, expr: Expr) -> Result { + match expr { + Expr::Between { + negated, + expr, + low, + high, + } => { + let expr: Expr = expr.as_ref().clone(); + let low: Expr = low.as_ref().clone(); + let high: Expr = high.as_ref().clone(); + if negated { + Ok(expr.clone().lt(low).or(expr.gt(high))) + } else { + Ok(expr.clone().gt_eq(low).and(expr.lt_eq(high))) + } + } + _ => Ok(expr.clone()), + } + } +} + +struct MyContextProvider {} + +impl ContextProvider for MyContextProvider { + fn get_table_provider(&self, name: TableReference) -> Result> { + if name.table() == "person" { + Ok(Arc::new(MyTableSource { + schema: Arc::new(Schema::new(vec![ + Field::new("name", DataType::Utf8, false), + Field::new("age", DataType::UInt8, false), + ])), + })) + } else { + Err(DataFusionError::Plan("table not found".to_string())) + } + } + + fn get_function_meta(&self, _name: &str) -> Option> { + None + } + + fn get_aggregate_meta(&self, _name: &str) -> Option> { + None + } + + fn get_variable_type(&self, _variable_names: &[String]) -> Option { + None + } +} + +struct MyTableSource { + schema: SchemaRef, +} + +impl TableSource for MyTableSource { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index c21dd5c8818e..6f683dc7ffd7 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -367,10 +367,17 @@ impl PartialOrd for Expr { impl Expr { /// Returns the name of this expression as it should appear in a schema. This name /// will not include any CAST expressions. - pub fn name(&self) -> Result { + pub fn display_name(&self) -> Result { create_name(self) } + /// Returns the name of this expression as it should appear in a schema. This name + /// will not include any CAST expressions. + #[deprecated(since = "14.0.0", note = "please use `display_name` instead")] + pub fn name(&self) -> Result { + self.display_name() + } + /// Returns a full and complete string representation of this expression. pub fn canonical_name(&self) -> String { format!("{}", self) @@ -1186,7 +1193,7 @@ mod test { assert_eq!(expected, expr.canonical_name()); assert_eq!(expected, format!("{}", expr)); assert_eq!(expected, format!("{:?}", expr)); - assert_eq!(expected, expr.name()?); + assert_eq!(expected, expr.display_name()?); Ok(()) } @@ -1202,7 +1209,7 @@ mod test { assert_eq!(expected_canonical, format!("{:?}", expr)); // note that CAST intentionally has a name that is different from its `Display` // representation. CAST does not change the name of expressions. - assert_eq!("Float32(1.23)", expr.name()?); + assert_eq!("Float32(1.23)", expr.display_name()?); Ok(()) } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 5442a24212d2..cfcbcc940603 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -240,7 +240,7 @@ impl ExprSchemable for Expr { )), _ => Ok(DFField::new( None, - &self.name()?, + &self.display_name()?, self.get_type(input_schema)?, self.nullable(input_schema)?, )), diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 300c3b8cb6dc..631a64da6fd6 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -841,7 +841,7 @@ pub(crate) fn validate_unique_names<'a>( ) -> Result<()> { let mut unique_names = HashMap::new(); expressions.into_iter().enumerate().try_for_each(|(position, expr)| { - let name = expr.name()?; + let name = expr.display_name()?; match unique_names.get(&name) { None => { unique_names.insert(name, (position, expr)); diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 8e2544793b25..be4c45f8c931 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -676,7 +676,7 @@ pub fn columnize_expr(e: Expr, input_schema: &DFSchema) -> Expr { Expr::Alias(Box::new(columnize_expr(*inner_expr, input_schema)), name) } Expr::ScalarSubquery(_) => e.clone(), - _ => match e.name() { + _ => match e.display_name() { Ok(name) => match input_schema.field_with_unqualified_name(&name) { Ok(field) => Expr::Column(field.qualified_column()), // expression not provided as input, do not convert to a column reference @@ -728,7 +728,7 @@ pub fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result { let field = plan.schema().field_from_column(col)?; Ok(Expr::Column(field.qualified_column())) } - _ => Ok(Expr::Column(Column::from_name(expr.name()?))), + _ => Ok(Expr::Column(Column::from_name(expr.display_name()?))), } } diff --git a/datafusion/optimizer/README.md b/datafusion/optimizer/README.md index 39d28a8fae37..b8edeea87ac1 100644 --- a/datafusion/optimizer/README.md +++ b/datafusion/optimizer/README.md @@ -17,10 +17,318 @@ under the License. --> -# DataFusion Query Optimizer Rules +# DataFusion Query Optimizer -[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. +[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory +format. -This crate is a submodule of DataFusion that provides query optimizer rules. +DataFusion has modular design, allowing individual crates to be re-used in other projects. + +This crate is a submodule of DataFusion that provides a query optimizer for logical plans, and +contains an extensive set of OptimizerRules that may rewrite the plan and/or its expressions so +they execute more quickly while still computing the same result. + +## Running the Optimizer + +The following code demonstrates the basic flow of creating the optimizer with a default set of optimization rules +and applying it to a logical plan to produce an optimized logical plan. + +```rust + +// We need a logical plan as the starting point. There are many ways to build a logical plan: +// +// The `datafusion-expr` crate provides a LogicalPlanBuilder +// The `datafusion-sql` crate provides a SQL query planner that can create a LogicalPlan from SQL +// The `datafusion` crate provides a DataFrame API that can create a LogicalPlan +let logical_plan = ... + +let mut config = OptimizerConfig::default(); +let optimizer = Optimizer::new(&config); +let optimized_plan = optimizer.optimize(&logical_plan, &mut config, observe)?; + +fn observe(plan: &LogicalPlan, rule: &dyn OptimizerRule) { + println!( + "After applying rule '{}':\n{}", + rule.name(), + plan.display_indent() + ) +} +``` + +## Providing Custom Rules + +The optimizer can be created with a custom set of rules. + +```rust +let optimizer = Optimizer::with_rules(vec![ + Arc::new(MyRule {}) +]); +``` + +## Writing Optimization Rules + +Please refer to the [rewrite_expr example](../../datafusion-examples/examples/rewrite_expr.rs) to learn more about +the general approach to writing optimizer rules and then move onto studying the existing rules. + +All rules must implement the `OptimizerRule` trait. + +```rust +/// `OptimizerRule` transforms one ['LogicalPlan'] into another which +/// computes the same results, but in a potentially more efficient +/// way. If there are no suitable transformations for the input plan, +/// the optimizer can simply return it as is. +pub trait OptimizerRule { + /// Rewrite `plan` to an optimized form + fn optimize( + &self, + plan: &LogicalPlan, + optimizer_config: &mut OptimizerConfig, + ) -> Result; + + /// A human readable name for this optimizer rule + fn name(&self) -> &str; +} +``` + +### General Guidelines + +Rules typical walk the logical plan and walk the expression trees inside operators and selectively mutate +individual operators or expressions. + +Sometimes there is an initial pass that visits the plan and builds state that is used in a second pass that performs +the actual optimization. This approach is used in projection push down and filter push down. + +### Expression Naming + +Every expression in DataFusion has a name, which is used as the column name. For example, in this example the output +contains a single column with the name `"COUNT(aggregate_test_100.c9)"`: + +```text +❯ select count(c9) from aggregate_test_100; ++------------------------------+ +| COUNT(aggregate_test_100.c9) | ++------------------------------+ +| 100 | ++------------------------------+ +``` + +These names are used to refer to the columns in both subqueries as well as internally from one stage of the LogicalPlan +to another. For example: + +```text +❯ select "COUNT(aggregate_test_100.c9)" + 1 from (select count(c9) from aggregate_test_100) as sq; ++--------------------------------------------+ +| sq.COUNT(aggregate_test_100.c9) + Int64(1) | ++--------------------------------------------+ +| 101 | ++--------------------------------------------+ +``` + +### Implication + +Because DataFusion identifies columns using a string name, it means it is critical that the names of expressions are +not changed by the optimizer when it rewrites expressions. This is typically accomplished by renaming a rewritten +expression by adding an alias. + +Here is a simple example of such a rewrite. The expression `1 + 2` can be internally simplified to 3 but must still be +displayed the same as `1 + 2`: + +```text +❯ select 1 + 2; ++---------------------+ +| Int64(1) + Int64(2) | ++---------------------+ +| 3 | ++---------------------+ +``` + +Looking at the `EXPLAIN` output we can see that the optimizer has effectively rewritten `1 + 2` into effectively +`3 as "1 + 2"`: + +```text +❯ explain select 1 + 2; ++---------------+-------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------+ +| logical_plan | Projection: Int64(3) AS Int64(1) + Int64(2) | +| | EmptyRelation | +| physical_plan | ProjectionExec: expr=[3 as Int64(1) + Int64(2)] | +| | EmptyExec: produce_one_row=true | +| | | ++---------------+-------------------------------------------------+ +``` + +If the expression name is not preserved, bugs such as [#3704](https://github.com/apache/arrow-datafusion/issues/3704) +and [#3555](https://github.com/apache/arrow-datafusion/issues/3555) occur where the expected columns can not be found. + +### Building Expression Names + +There are currently two ways to create a name for an expression in the logical plan. + +```rust +impl Expr { + /// Returns the name of this expression as it should appear in a schema. This name + /// will not include any CAST expressions. + pub fn display_name(&self) -> Result { + create_name(self) + } + + /// Returns a full and complete string representation of this expression. + pub fn canonical_name(&self) -> String { + format!("{}", self) + } +} +``` + +When comparing expressions to determine if they are equivalent, `canonical_name` should be used, and when creating a +name to be used in a schema, `display_name` should be used. + +### Utilities + +There are a number of utility methods provided that take care of some common tasks. + +### ExprVisitor + +The `ExprVisitor` and `ExprVisitable` traits provide a mechanism for applying a visitor pattern to an expression tree. + +Here is an example that demonstrates this. + +```rust +fn extract_subquery_filters(expression: &Expr, extracted: &mut Vec) -> Result<()> { + struct InSubqueryVisitor<'a> { + accum: &'a mut Vec, + } + + impl ExpressionVisitor for InSubqueryVisitor<'_> { + fn pre_visit(self, expr: &Expr) -> Result> { + if let Expr::InSubquery { .. } = expr { + self.accum.push(expr.to_owned()); + } + Ok(Recursion::Continue(self)) + } + } + + expression.accept(InSubqueryVisitor { accum: extracted })?; + Ok(()) +} +``` + +### Rewriting Expressions + +The `MyExprRewriter` trait can be implemented to provide a way to rewrite expressions. This rule can then be applied +to an expression by calling `Expr::rewrite` (from the `ExprRewritable` trait). + +The `rewrite` method will perform a depth first walk of the expression and its children to rewrite an expression, +consuming `self` producing a new expression. + +```rust +let mut expr_rewriter = MyExprRewriter {}; +let expr = expr.rewrite(&mut expr_rewriter)?; +``` + +Here is an example implementation which will rewrite `expr BETWEEN a AND b` as `expr >= a AND expr <= b`. Note that the +implementation does not need to perform any recursion since this is handled by the `rewrite` method. + +```rust +struct MyExprRewriter {} + +impl ExprRewriter for MyExprRewriter { + fn mutate(&mut self, expr: Expr) -> Result { + match expr { + Expr::Between { + negated, + expr, + low, + high, + } => { + let expr: Expr = expr.as_ref().clone(); + let low: Expr = low.as_ref().clone(); + let high: Expr = high.as_ref().clone(); + if negated { + Ok(expr.clone().lt(low).or(expr.clone().gt(high))) + } else { + Ok(expr.clone().gt_eq(low).and(expr.clone().lt_eq(high))) + } + } + _ => Ok(expr.clone()), + } + } +} +``` + +### optimize_children + +Typically a rule is applied recursively to all operators within a query plan. Rather than duplicate +that logic in each rule, an `optimize_children` method is provided. This recursively invokes the `optimize` method on +the plan's children and then returns a node of the same type. + +```rust +fn optimize( + &self, + plan: &LogicalPlan, + _config: &mut OptimizerConfig, +) -> Result { + // recurse down and optimize children first + let plan = utils::optimize_children(self, plan, _config)?; + + ... +} +``` + +### Writing Tests + +There should be unit tests in the same file as the new rule that test the effect of the rule being applied to a plan +in isolation (without any other rule being applied). + +There should also be a test in `integration-tests.rs` that tests the rule as part of the overall optimization process. + +### Debugging + +The `EXPLAIN VERBOSE` command can be used to show the effect of each optimization rule on a query. + +In the following example, the `type_coercion` and `simplify_expressions` passes have simplified the plan so that it returns the constant `"3.2"` rather than doing a computation at execution time. + +```text +❯ explain verbose select cast(1 + 2.2 as string) as foo; ++------------------------------------------------------------+---------------------------------------------------------------------------+ +| plan_type | plan | ++------------------------------------------------------------+---------------------------------------------------------------------------+ +| initial_logical_plan | Projection: CAST(Int64(1) + Float64(2.2) AS Utf8) AS foo | +| | EmptyRelation | +| logical_plan after type_coercion | Projection: CAST(CAST(Int64(1) AS Float64) + Float64(2.2) AS Utf8) AS foo | +| | EmptyRelation | +| logical_plan after simplify_expressions | Projection: Utf8("3.2") AS foo | +| | EmptyRelation | +| logical_plan after unwrap_cast_in_comparison | SAME TEXT AS ABOVE | +| logical_plan after decorrelate_where_exists | SAME TEXT AS ABOVE | +| logical_plan after decorrelate_where_in | SAME TEXT AS ABOVE | +| logical_plan after scalar_subquery_to_join | SAME TEXT AS ABOVE | +| logical_plan after subquery_filter_to_join | SAME TEXT AS ABOVE | +| logical_plan after simplify_expressions | SAME TEXT AS ABOVE | +| logical_plan after eliminate_filter | SAME TEXT AS ABOVE | +| logical_plan after reduce_cross_join | SAME TEXT AS ABOVE | +| logical_plan after common_sub_expression_eliminate | SAME TEXT AS ABOVE | +| logical_plan after eliminate_limit | SAME TEXT AS ABOVE | +| logical_plan after projection_push_down | SAME TEXT AS ABOVE | +| logical_plan after rewrite_disjunctive_predicate | SAME TEXT AS ABOVE | +| logical_plan after reduce_outer_join | SAME TEXT AS ABOVE | +| logical_plan after filter_push_down | SAME TEXT AS ABOVE | +| logical_plan after limit_push_down | SAME TEXT AS ABOVE | +| logical_plan after single_distinct_aggregation_to_group_by | SAME TEXT AS ABOVE | +| logical_plan | Projection: Utf8("3.2") AS foo | +| | EmptyRelation | +| initial_physical_plan | ProjectionExec: expr=[3.2 as foo] | +| | EmptyExec: produce_one_row=true | +| | | +| physical_plan after aggregate_statistics | SAME TEXT AS ABOVE | +| physical_plan after hash_build_probe_order | SAME TEXT AS ABOVE | +| physical_plan after coalesce_batches | SAME TEXT AS ABOVE | +| physical_plan after repartition | SAME TEXT AS ABOVE | +| physical_plan after add_merge_exec | SAME TEXT AS ABOVE | +| physical_plan | ProjectionExec: expr=[3.2 as foo] | +| | EmptyExec: produce_one_row=true | +| | | ++------------------------------------------------------------+---------------------------------------------------------------------------+ +``` [df]: https://crates.io/crates/datafusion diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index d33fe07c04b6..c0edba96fe0f 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -546,7 +546,7 @@ impl ExprRewriter for CommonSubexprRewriter<'_> { self.curr_index += 1; } - let expr_name = expr.name()?; + let expr_name = expr.display_name()?; // Alias this `Column` expr to it original "expr name", // `projection_push_down` optimizer use "expr name" to eliminate useless // projections. diff --git a/datafusion/optimizer/src/filter_push_down.rs b/datafusion/optimizer/src/filter_push_down.rs index d1f696621373..22b6aa5a1ba3 100644 --- a/datafusion/optimizer/src/filter_push_down.rs +++ b/datafusion/optimizer/src/filter_push_down.rs @@ -406,7 +406,7 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result { let agg_columns = aggr_expr .iter() - .map(|x| Ok(Column::from_name(x.name()?))) + .map(|x| Ok(Column::from_name(x.display_name()?))) .collect::>>()?; used_columns.extend(agg_columns); diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 87e4d1ffcd13..5e61ccc5a4ef 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -43,7 +43,8 @@ use std::sync::Arc; /// `OptimizerRule` transforms one ['LogicalPlan'] into another which /// computes the same results, but in a potentially more efficient -/// way. +/// way. If there are no suitable transformations for the input plan, +/// the optimizer can simply return it as is. pub trait OptimizerRule { /// Rewrite `plan` to an optimized form fn optimize( diff --git a/datafusion/optimizer/src/projection_push_down.rs b/datafusion/optimizer/src/projection_push_down.rs index 5a048aac70f0..9e61ccd6e229 100644 --- a/datafusion/optimizer/src/projection_push_down.rs +++ b/datafusion/optimizer/src/projection_push_down.rs @@ -259,7 +259,7 @@ fn optimize_plan( let mut new_window_expr = Vec::new(); { window_expr.iter().try_for_each(|expr| { - let name = &expr.name()?; + let name = &expr.display_name()?; let column = Column::from_name(name); if required_columns.contains(&column) { new_window_expr.push(expr.clone()); @@ -317,7 +317,7 @@ fn optimize_plan( // Gather all columns needed for expressions in this Aggregate let mut new_aggr_expr = Vec::new(); aggr_expr.iter().try_for_each(|expr| { - let name = &expr.name()?; + let name = &expr.display_name()?; let column = Column::from_name(name); if required_columns.contains(&column) { new_aggr_expr.push(expr.clone()); diff --git a/datafusion/optimizer/src/simplify_expressions.rs b/datafusion/optimizer/src/simplify_expressions.rs index 23d3edf91d37..e9720cc71583 100644 --- a/datafusion/optimizer/src/simplify_expressions.rs +++ b/datafusion/optimizer/src/simplify_expressions.rs @@ -265,12 +265,12 @@ impl SimplifyExpressions { .map(|e| { // We need to keep original expression name, if any. // Constant folding should not change expression name. - let name = &e.name(); + let name = &e.display_name(); // Apply the actual simplification logic let new_e = simplifier.simplify(e)?; - let new_name = &new_e.name(); + let new_name = &new_e.display_name(); if let (Ok(expr_name), Ok(new_expr_name)) = (name, new_name) { if expr_name != new_expr_name { diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 6d3343763796..e6ec0e72febf 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -91,7 +91,7 @@ fn optimize(plan: &LogicalPlan) -> Result { fun, args, filter, .. } => { // is_single_distinct_agg ensure args.len=1 - if group_fields_set.insert(args[0].name()?) { + if group_fields_set.insert(args[0].display_name()?) { inner_group_exprs .push(args[0].clone().alias(SINGLE_DISTINCT_ALIAS)); } @@ -189,7 +189,7 @@ fn is_single_distinct_agg(plan: &LogicalPlan) -> Result { distinct_count += 1; } for e in args { - fields_set.insert(e.name()?); + fields_set.insert(e.display_name()?); } } } diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index d9b5cd9a87c5..c1323dfbba80 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -335,7 +335,7 @@ where fn name_for_alias(expr: &Expr) -> Result { match expr { Expr::Sort { expr, .. } => name_for_alias(expr), - expr => expr.name(), + expr => expr.display_name(), } } @@ -448,14 +448,14 @@ mod tests { let expr = rewrite_preserving_name(expr_from.clone(), &mut rewriter).unwrap(); let original_name = match &expr_from { - Expr::Sort { expr, .. } => expr.name(), - expr => expr.name(), + Expr::Sort { expr, .. } => expr.display_name(), + expr => expr.display_name(), } .unwrap(); let new_name = match &expr { - Expr::Sort { expr, .. } => expr.name(), - expr => expr.name(), + Expr::Sort { expr, .. } => expr.display_name(), + expr => expr.display_name(), } .unwrap();