diff --git a/datafusion/optimizer/src/eliminate_one_union.rs b/datafusion/optimizer/src/eliminate_one_union.rs deleted file mode 100644 index 3e027811420c..000000000000 --- a/datafusion/optimizer/src/eliminate_one_union.rs +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! [`EliminateOneUnion`] eliminates single element `Union` - -use crate::{OptimizerConfig, OptimizerRule}; -use datafusion_common::{tree_node::Transformed, Result}; -use datafusion_expr::logical_plan::{LogicalPlan, Union}; -use std::sync::Arc; - -use crate::optimizer::ApplyOrder; - -#[derive(Default, Debug)] -/// An optimization rule that eliminates union with one element. -pub struct EliminateOneUnion; - -impl EliminateOneUnion { - #[allow(missing_docs)] - pub fn new() -> Self { - Self {} - } -} - -impl OptimizerRule for EliminateOneUnion { - fn name(&self) -> &str { - "eliminate_one_union" - } - - fn supports_rewrite(&self) -> bool { - true - } - - fn rewrite( - &self, - plan: LogicalPlan, - _config: &dyn OptimizerConfig, - ) -> Result> { - match plan { - LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => Ok( - Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())), - ), - _ => Ok(Transformed::no(plan)), - } - } - - fn apply_order(&self) -> Option { - Some(ApplyOrder::TopDown) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::test::*; - use arrow::datatypes::{DataType, Field, Schema}; - use datafusion_common::ToDFSchema; - use datafusion_expr::{ - expr_rewriter::coerce_plan_expr_for_schema, logical_plan::table_scan, - }; - use std::sync::Arc; - - fn schema() -> Schema { - Schema::new(vec![ - Field::new("id", DataType::Int32, false), - Field::new("key", DataType::Utf8, false), - Field::new("value", DataType::Int32, false), - ]) - } - - fn assert_optimized_plan_equal(plan: LogicalPlan, expected: &str) -> Result<()> { - assert_optimized_plan_with_rules( - vec![Arc::new(EliminateOneUnion::new())], - plan, - expected, - true, - ) - } - - #[test] - fn eliminate_nothing() -> Result<()> { - let plan_builder = table_scan(Some("table"), &schema(), None)?; - - let plan = plan_builder.clone().union(plan_builder.build()?)?.build()?; - - let expected = "\ - Union\ - \n TableScan: table\ - \n TableScan: table"; - assert_optimized_plan_equal(plan, expected) - } - - #[test] - fn eliminate_one_union() -> Result<()> { - let table_plan = coerce_plan_expr_for_schema( - table_scan(Some("table"), &schema(), None)?.build()?, - &schema().to_dfschema()?, - )?; - let schema = Arc::clone(table_plan.schema()); - let single_union_plan = LogicalPlan::Union(Union { - inputs: vec![Arc::new(table_plan)], - schema, - }); - - let expected = "TableScan: table"; - assert_optimized_plan_equal(single_union_plan, expected) - } -} diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs index 07ef2a46cba9..7632ff858df6 100644 --- a/datafusion/optimizer/src/lib.rs +++ b/datafusion/optimizer/src/lib.rs @@ -51,12 +51,17 @@ pub mod eliminate_filter; pub mod eliminate_group_by_constant; pub mod eliminate_join; pub mod eliminate_limit; -pub mod eliminate_nested_union; -pub mod eliminate_one_union; +#[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")] +pub mod eliminate_nested_union { + use crate::optimize_unions::OptimizeUnions; + #[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")] + pub type EliminateNestedUnion = OptimizeUnions; +} pub mod eliminate_outer_join; pub mod extract_equijoin_predicate; pub mod filter_null_join_keys; pub mod optimize_projections; +pub mod optimize_unions; pub mod optimizer; pub mod propagate_empty_relation; pub mod push_down_filter; diff --git a/datafusion/optimizer/src/eliminate_nested_union.rs b/datafusion/optimizer/src/optimize_unions.rs similarity index 90% rename from datafusion/optimizer/src/eliminate_nested_union.rs rename to datafusion/optimizer/src/optimize_unions.rs index f8f93727cd9b..cfabd512b427 100644 --- a/datafusion/optimizer/src/eliminate_nested_union.rs +++ b/datafusion/optimizer/src/optimize_unions.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! [`EliminateNestedUnion`]: flattens nested `Union` to a single `Union` +//! [`OptimizeUnions`]: removes `Union` nodes in the logical plan. use crate::optimizer::ApplyOrder; use crate::{OptimizerConfig, OptimizerRule}; use datafusion_common::tree_node::Transformed; @@ -26,19 +26,21 @@ use itertools::Itertools; use std::sync::Arc; #[derive(Default, Debug)] -/// An optimization rule that replaces nested unions with a single union. -pub struct EliminateNestedUnion; +/// An optimization rule that +/// 1. replaces nested unions with a single union. +/// 2. removes unions with a single input. +pub struct OptimizeUnions; -impl EliminateNestedUnion { +impl OptimizeUnions { #[allow(missing_docs)] pub fn new() -> Self { Self {} } } -impl OptimizerRule for EliminateNestedUnion { +impl OptimizerRule for OptimizeUnions { fn name(&self) -> &str { - "eliminate_nested_union" + "optimize_unions" } fn apply_order(&self) -> Option { @@ -55,6 +57,9 @@ impl OptimizerRule for EliminateNestedUnion { _config: &dyn OptimizerConfig, ) -> Result> { match plan { + LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => Ok( + Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())), + ), LogicalPlan::Union(Union { inputs, schema }) => { let inputs = inputs .into_iter() @@ -139,7 +144,7 @@ mod tests { let analyzed_plan = Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())]) .execute_and_check($plan, &options, |_, _| {})?; let optimizer_ctx = OptimizerContext::new().with_max_passes(1); - let rules: Vec> = vec![Arc::new(EliminateNestedUnion::new())]; + let rules: Vec> = vec![Arc::new(OptimizeUnions::new())]; assert_optimized_plan_eq_snapshot!( optimizer_ctx, rules, @@ -420,4 +425,28 @@ mod tests { TableScan: table_1 ") } + + #[test] + fn eliminate_one_union() -> Result<()> { + let plan = table_scan(Some("table"), &schema(), None)?.build()?; + let schema = Arc::clone(plan.schema()); + // note it is not possible to create a single input union via + // LogicalPlanBuilder so create it manually here + let plan = LogicalPlan::Union(Union { + inputs: vec![Arc::new(plan)], + schema, + }); + + // Note we can't use the same assert_optimized_plan_equal as creating a + // single input union is not possible via LogicalPlanBuilder and other passes + // throw errors / don't handle the schema correctly. + assert_optimized_plan_eq_snapshot!( + OptimizerContext::new().with_max_passes(1), + vec![Arc::new(OptimizeUnions::new())], + plan, + @r" + TableScan: table + " + ) + } } diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 084152d40e92..421563d5e7e8 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -41,12 +41,11 @@ use crate::eliminate_filter::EliminateFilter; use crate::eliminate_group_by_constant::EliminateGroupByConstant; use crate::eliminate_join::EliminateJoin; use crate::eliminate_limit::EliminateLimit; -use crate::eliminate_nested_union::EliminateNestedUnion; -use crate::eliminate_one_union::EliminateOneUnion; use crate::eliminate_outer_join::EliminateOuterJoin; use crate::extract_equijoin_predicate::ExtractEquijoinPredicate; use crate::filter_null_join_keys::FilterNullJoinKeys; use crate::optimize_projections::OptimizeProjections; +use crate::optimize_unions::OptimizeUnions; use crate::plan_signature::LogicalPlanSignature; use crate::propagate_empty_relation::PropagateEmptyRelation; use crate::push_down_filter::PushDownFilter; @@ -228,7 +227,7 @@ impl Optimizer { /// Create a new optimizer using the recommended list of rules pub fn new() -> Self { let rules: Vec> = vec![ - Arc::new(EliminateNestedUnion::new()), + Arc::new(OptimizeUnions::new()), Arc::new(SimplifyExpressions::new()), Arc::new(ReplaceDistinctWithAggregate::new()), Arc::new(EliminateJoin::new()), @@ -241,8 +240,6 @@ impl Optimizer { Arc::new(EliminateCrossJoin::new()), Arc::new(EliminateLimit::new()), Arc::new(PropagateEmptyRelation::new()), - // Must be after PropagateEmptyRelation - Arc::new(EliminateOneUnion::new()), Arc::new(FilterNullJoinKeys::default()), Arc::new(EliminateOuterJoin::new()), // Filters can't be pushed down past Limits, we should do PushDownFilter after PushDownLimit diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs index 4db3215dfb76..629b13e4001d 100644 --- a/datafusion/optimizer/src/propagate_empty_relation.rs +++ b/datafusion/optimizer/src/propagate_empty_relation.rs @@ -244,7 +244,7 @@ mod tests { use crate::assert_optimized_plan_eq_snapshot; use crate::eliminate_filter::EliminateFilter; - use crate::eliminate_nested_union::EliminateNestedUnion; + use crate::optimize_unions::OptimizeUnions; use crate::test::{ assert_optimized_plan_with_rules, test_table_scan, test_table_scan_fields, test_table_scan_with_name, @@ -277,7 +277,7 @@ mod tests { assert_optimized_plan_with_rules( vec![ Arc::new(EliminateFilter::new()), - Arc::new(EliminateNestedUnion::new()), + Arc::new(OptimizeUnions::new()), Arc::new(PropagateEmptyRelation::new()), ], plan, diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index bfd693e6a0f8..989589dfb8b2 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -29,7 +29,7 @@ use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; use datafusion::execution::options::ArrowReadOptions; -use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion; +use datafusion::optimizer::optimize_unions::OptimizeUnions; use datafusion::optimizer::Optimizer; use datafusion_common::parsers::CompressionTypeVariant; use datafusion_functions_aggregate::sum::sum_distinct; @@ -2744,7 +2744,7 @@ async fn roundtrip_union_query() -> Result<()> { let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?; // proto deserialization only supports 2-way union, hence this plan has nested unions // apply the flatten unions optimizer rule to be able to compare - let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]); + let optimizer = Optimizer::with_rules(vec![Arc::new(OptimizeUnions::new())]); let unnested = optimizer.optimize(logical_round_trip, &(ctx.state()), |_x, _y| {})?; assert_eq!( format!("{}", plan.display_indent_schema()), diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt index d7af5ff4b9c2..918c01b5613a 100644 --- a/datafusion/sqllogictest/test_files/explain.slt +++ b/datafusion/sqllogictest/test_files/explain.slt @@ -176,7 +176,7 @@ initial_logical_plan logical_plan after resolve_grouping_function SAME TEXT AS ABOVE logical_plan after type_coercion SAME TEXT AS ABOVE analyzed_logical_plan SAME TEXT AS ABOVE -logical_plan after eliminate_nested_union SAME TEXT AS ABOVE +logical_plan after optimize_unions SAME TEXT AS ABOVE logical_plan after simplify_expressions SAME TEXT AS ABOVE logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE logical_plan after eliminate_join SAME TEXT AS ABOVE @@ -189,7 +189,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE logical_plan after eliminate_cross_join SAME TEXT AS ABOVE logical_plan after eliminate_limit SAME TEXT AS ABOVE logical_plan after propagate_empty_relation SAME TEXT AS ABOVE -logical_plan after eliminate_one_union SAME TEXT AS ABOVE logical_plan after filter_null_join_keys SAME TEXT AS ABOVE logical_plan after eliminate_outer_join SAME TEXT AS ABOVE logical_plan after push_down_limit SAME TEXT AS ABOVE @@ -198,7 +197,7 @@ logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE logical_plan after optimize_projections TableScan: simple_explain_test projection=[a, b, c] -logical_plan after eliminate_nested_union SAME TEXT AS ABOVE +logical_plan after optimize_unions SAME TEXT AS ABOVE logical_plan after simplify_expressions SAME TEXT AS ABOVE logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE logical_plan after eliminate_join SAME TEXT AS ABOVE @@ -211,7 +210,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE logical_plan after eliminate_cross_join SAME TEXT AS ABOVE logical_plan after eliminate_limit SAME TEXT AS ABOVE logical_plan after propagate_empty_relation SAME TEXT AS ABOVE -logical_plan after eliminate_one_union SAME TEXT AS ABOVE logical_plan after filter_null_join_keys SAME TEXT AS ABOVE logical_plan after eliminate_outer_join SAME TEXT AS ABOVE logical_plan after push_down_limit SAME TEXT AS ABOVE @@ -537,7 +535,7 @@ initial_logical_plan logical_plan after resolve_grouping_function SAME TEXT AS ABOVE logical_plan after type_coercion SAME TEXT AS ABOVE analyzed_logical_plan SAME TEXT AS ABOVE -logical_plan after eliminate_nested_union SAME TEXT AS ABOVE +logical_plan after optimize_unions SAME TEXT AS ABOVE logical_plan after simplify_expressions SAME TEXT AS ABOVE logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE logical_plan after eliminate_join SAME TEXT AS ABOVE @@ -550,7 +548,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE logical_plan after eliminate_cross_join SAME TEXT AS ABOVE logical_plan after eliminate_limit SAME TEXT AS ABOVE logical_plan after propagate_empty_relation SAME TEXT AS ABOVE -logical_plan after eliminate_one_union SAME TEXT AS ABOVE logical_plan after filter_null_join_keys SAME TEXT AS ABOVE logical_plan after eliminate_outer_join SAME TEXT AS ABOVE logical_plan after push_down_limit SAME TEXT AS ABOVE @@ -559,7 +556,7 @@ logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE logical_plan after optimize_projections TableScan: simple_explain_test projection=[a, b, c] -logical_plan after eliminate_nested_union SAME TEXT AS ABOVE +logical_plan after optimize_unions SAME TEXT AS ABOVE logical_plan after simplify_expressions SAME TEXT AS ABOVE logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE logical_plan after eliminate_join SAME TEXT AS ABOVE @@ -572,7 +569,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE logical_plan after eliminate_cross_join SAME TEXT AS ABOVE logical_plan after eliminate_limit SAME TEXT AS ABOVE logical_plan after propagate_empty_relation SAME TEXT AS ABOVE -logical_plan after eliminate_one_union SAME TEXT AS ABOVE logical_plan after filter_null_join_keys SAME TEXT AS ABOVE logical_plan after eliminate_outer_join SAME TEXT AS ABOVE logical_plan after push_down_limit SAME TEXT AS ABOVE