diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index c394d3895622..cca3cd75b26b 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -21,7 +21,9 @@ use crate::{ catalog::{CatalogList, MemoryCatalogList}, information_schema::CatalogWithInformationSchema, }, - optimizer::hash_build_probe_order::HashBuildProbeOrder, + optimizer::{ + eliminate_limit::EliminateLimit, hash_build_probe_order::HashBuildProbeOrder, + }, physical_optimizer::optimizer::PhysicalOptimizerRule, }; use log::debug; @@ -636,6 +638,7 @@ impl ExecutionConfig { batch_size: 8192, optimizers: vec![ Arc::new(ConstantFolding::new()), + Arc::new(EliminateLimit::new()), Arc::new(ProjectionPushDown::new()), Arc::new(FilterPushDown::new()), Arc::new(HashBuildProbeOrder::new()), diff --git a/datafusion/src/optimizer/eliminate_limit.rs b/datafusion/src/optimizer/eliminate_limit.rs new file mode 100644 index 000000000000..87b33d6f5d5b --- /dev/null +++ b/datafusion/src/optimizer/eliminate_limit.rs @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Optimizer rule to replace `LIMIT 0` on a plan with an empty relation. +//! This saves time in planning and executing the query. +use crate::error::Result; +use crate::logical_plan::LogicalPlan; +use crate::optimizer::optimizer::OptimizerRule; + +use super::utils; + +/// Optimization rule that replaces LIMIT 0 with an [LogicalPlan::EmptyRelation] +pub struct EliminateLimit; + +impl EliminateLimit { + #[allow(missing_docs)] + pub fn new() -> Self { + Self {} + } +} + +impl OptimizerRule for EliminateLimit { + fn optimize(&self, plan: &LogicalPlan) -> Result { + match plan { + LogicalPlan::Limit { n, input } if *n == 0 => { + Ok(LogicalPlan::EmptyRelation { + produce_one_row: false, + schema: input.schema().clone(), + }) + } + // Rest: recurse and find possible LIMIT 0 nodes + _ => { + let expr = plan.expressions(); + + // apply the optimization to all inputs of the plan + let inputs = plan.inputs(); + let new_inputs = inputs + .iter() + .map(|plan| self.optimize(plan)) + .collect::>>()?; + + utils::from_plan(plan, &expr, &new_inputs) + } + } + } + + fn name(&self) -> &str { + "eliminate_limit" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::logical_plan::LogicalPlanBuilder; + use crate::logical_plan::{col, sum}; + use crate::test::*; + + fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { + let rule = EliminateLimit::new(); + let optimized_plan = rule.optimize(plan).expect("failed to optimize plan"); + let formatted_plan = format!("{:?}", optimized_plan); + assert_eq!(formatted_plan, expected); + assert_eq!(plan.schema(), optimized_plan.schema()); + } + + #[test] + fn limit_0_root() { + let table_scan = test_table_scan().unwrap(); + let plan = LogicalPlanBuilder::from(&table_scan) + .aggregate(vec![col("a")], vec![sum(col("b"))]) + .unwrap() + .limit(0) + .unwrap() + .build() + .unwrap(); + + // No aggregate / scan / limit + let expected = "EmptyRelation"; + assert_optimized_plan_eq(&plan, expected); + } + + #[test] + fn limit_0_nested() { + let table_scan = test_table_scan().unwrap(); + let plan1 = LogicalPlanBuilder::from(&table_scan) + .aggregate(vec![col("a")], vec![sum(col("b"))]) + .unwrap() + .build() + .unwrap(); + let plan = LogicalPlanBuilder::from(&table_scan) + .aggregate(vec![col("a")], vec![sum(col("b"))]) + .unwrap() + .limit(0) + .unwrap() + .union(plan1) + .unwrap() + .build() + .unwrap(); + + // Left side is removed + let expected = "Union\ + \n EmptyRelation\ + \n Aggregate: groupBy=[[#a]], aggr=[[SUM(#b)]]\ + \n TableScan: test projection=None"; + assert_optimized_plan_eq(&plan, expected); + } +} diff --git a/datafusion/src/optimizer/mod.rs b/datafusion/src/optimizer/mod.rs index dc59b64ff460..2fb8a3d62950 100644 --- a/datafusion/src/optimizer/mod.rs +++ b/datafusion/src/optimizer/mod.rs @@ -19,6 +19,7 @@ //! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion". pub mod constant_folding; +pub mod eliminate_limit; pub mod filter_push_down; pub mod hash_build_probe_order; pub mod limit_push_down;