From e0f083befd9fdf4b4c803023edf3778b209dc6c7 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 27 Apr 2021 18:01:54 +0200 Subject: [PATCH 1/3] Add rule to eliminate limit 0 --- datafusion/src/optimizer/eliminate_limit.rs | 122 ++++++++++++++++++++ datafusion/src/optimizer/mod.rs | 1 + 2 files changed, 123 insertions(+) create mode 100644 datafusion/src/optimizer/eliminate_limit.rs diff --git a/datafusion/src/optimizer/eliminate_limit.rs b/datafusion/src/optimizer/eliminate_limit.rs new file mode 100644 index 000000000000..897b44f2289e --- /dev/null +++ b/datafusion/src/optimizer/eliminate_limit.rs @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Optimizer rule to push down LIMIT in the query plan +//! It will push down through projection, limits (taking the smaller limit) +use crate::error::Result; +use crate::logical_plan::LogicalPlan; +use crate::optimizer::optimizer::OptimizerRule; + +use super::utils; + +/// Optimization rule that replaces Limit 0 with an empty relation +pub struct EliminateLimit {} + +impl EliminateLimit { + #[allow(missing_docs)] + pub fn new() -> Self { + Self {} + } +} + +impl OptimizerRule for EliminateLimit { + fn optimize(&self, plan: &LogicalPlan) -> Result { + match plan { + LogicalPlan::Limit { n, input } if *n == 0 => { + Ok(LogicalPlan::EmptyRelation { + produce_one_row: false, + schema: input.schema().clone(), + }) + } + // Rest: recurse and find possible Limit 0 nodes + _ => { + let expr = plan.expressions(); + + // apply the optimization to all inputs of the plan + let inputs = plan.inputs(); + let new_inputs = inputs + .iter() + .map(|plan| self.optimize(plan)) + .collect::>>()?; + + utils::from_plan(plan, &expr, &new_inputs) + } + } + } + + fn name(&self) -> &str { + "eliminate_limit" + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::logical_plan::LogicalPlanBuilder; + use crate::logical_plan::{col, sum}; + use crate::test::*; + + fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { + let rule = EliminateLimit::new(); + let optimized_plan = rule.optimize(plan).expect("failed to optimize plan"); + let formatted_plan = format!("{:?}", optimized_plan); + assert_eq!(formatted_plan, expected); + assert_eq!(plan.schema(), optimized_plan.schema()); + } + + #[test] + fn limit_0_root() { + let table_scan = test_table_scan().unwrap(); + let plan = LogicalPlanBuilder::from(&table_scan) + .aggregate(vec![col("a")], vec![sum(col("b"))]) + .unwrap() + .limit(0) + .unwrap() + .build() + .unwrap(); + + // No aggregate / scan / limit + let expected = "EmptyRelation"; + assert_optimized_plan_eq(&plan, expected); + } + + #[test] + fn limit_0_nested() { + let table_scan = test_table_scan().unwrap(); + let plan1 = LogicalPlanBuilder::from(&table_scan) + .aggregate(vec![col("a")], vec![sum(col("b"))]) + .unwrap() + .build() + .unwrap(); + let plan = LogicalPlanBuilder::from(&table_scan) + .aggregate(vec![col("a")], vec![sum(col("b"))]) + .unwrap() + .limit(0) + .unwrap() + .union(plan1) + .unwrap() + .build() + .unwrap(); + + // Left side is removed + let expected = "Union\ + \n EmptyRelation\ + \n Aggregate: groupBy=[[#a]], aggr=[[SUM(#b)]]\ + \n TableScan: test projection=None"; + assert_optimized_plan_eq(&plan, expected); + } +} diff --git a/datafusion/src/optimizer/mod.rs b/datafusion/src/optimizer/mod.rs index dc59b64ff460..2fb8a3d62950 100644 --- a/datafusion/src/optimizer/mod.rs +++ b/datafusion/src/optimizer/mod.rs @@ -19,6 +19,7 @@ //! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion". pub mod constant_folding; +pub mod eliminate_limit; pub mod filter_push_down; pub mod hash_build_probe_order; pub mod limit_push_down; From 2e4f878ca797599c8852655866795d3a31ddfdd9 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Tue, 27 Apr 2021 18:12:04 +0200 Subject: [PATCH 2/3] Add rule, document --- datafusion/src/execution/context.rs | 5 ++++- datafusion/src/optimizer/eliminate_limit.rs | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index c394d3895622..cca3cd75b26b 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -21,7 +21,9 @@ use crate::{ catalog::{CatalogList, MemoryCatalogList}, information_schema::CatalogWithInformationSchema, }, - optimizer::hash_build_probe_order::HashBuildProbeOrder, + optimizer::{ + eliminate_limit::EliminateLimit, hash_build_probe_order::HashBuildProbeOrder, + }, physical_optimizer::optimizer::PhysicalOptimizerRule, }; use log::debug; @@ -636,6 +638,7 @@ impl ExecutionConfig { batch_size: 8192, optimizers: vec![ Arc::new(ConstantFolding::new()), + Arc::new(EliminateLimit::new()), Arc::new(ProjectionPushDown::new()), Arc::new(FilterPushDown::new()), Arc::new(HashBuildProbeOrder::new()), diff --git a/datafusion/src/optimizer/eliminate_limit.rs b/datafusion/src/optimizer/eliminate_limit.rs index 897b44f2289e..02d2d35d52a4 100644 --- a/datafusion/src/optimizer/eliminate_limit.rs +++ b/datafusion/src/optimizer/eliminate_limit.rs @@ -15,15 +15,15 @@ // specific language governing permissions and limitations // under the License. -//! Optimizer rule to push down LIMIT in the query plan -//! It will push down through projection, limits (taking the smaller limit) +//! Optimizer rule to replace `LIMIT 0` on a plan with an empty relation. +//! This saves time in planning and executing the query. use crate::error::Result; use crate::logical_plan::LogicalPlan; use crate::optimizer::optimizer::OptimizerRule; use super::utils; -/// Optimization rule that replaces Limit 0 with an empty relation +/// Optimization rule that replaces LIMIT 0 with an [LogicalPlan::EmptyRelation] pub struct EliminateLimit {} impl EliminateLimit { @@ -42,7 +42,7 @@ impl OptimizerRule for EliminateLimit { schema: input.schema().clone(), }) } - // Rest: recurse and find possible Limit 0 nodes + // Rest: recurse and find possible LIMIT 0 nodes _ => { let expr = plan.expressions(); From c165e4220e59f80196c84c720bfb6b1a0a51cbe0 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Wed, 28 Apr 2021 00:07:18 +0200 Subject: [PATCH 3/3] Use unit struct --- datafusion/src/optimizer/eliminate_limit.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/optimizer/eliminate_limit.rs b/datafusion/src/optimizer/eliminate_limit.rs index 02d2d35d52a4..87b33d6f5d5b 100644 --- a/datafusion/src/optimizer/eliminate_limit.rs +++ b/datafusion/src/optimizer/eliminate_limit.rs @@ -24,7 +24,7 @@ use crate::optimizer::optimizer::OptimizerRule; use super::utils; /// Optimization rule that replaces LIMIT 0 with an [LogicalPlan::EmptyRelation] -pub struct EliminateLimit {} +pub struct EliminateLimit; impl EliminateLimit { #[allow(missing_docs)]