Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ members = [
"datafusion/data-access",
"datafusion/expr",
"datafusion/jit",
"datafusion/optimizer",
"datafusion/physical-expr",
"datafusion/proto",
"datafusion/row",
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ datafusion-common = { path = "../common", version = "8.0.0", features = ["parque
datafusion-data-access = { path = "../data-access", version = "8.0.0" }
datafusion-expr = { path = "../expr", version = "8.0.0" }
datafusion-jit = { path = "../jit", version = "8.0.0", optional = true }
datafusion-optimizer = { path = "../optimizer", version = "8.0.0" }
datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" }
datafusion-row = { path = "../row", version = "8.0.0" }
datafusion-sql = { path = "../sql", version = "8.0.0" }
Expand Down
16 changes: 6 additions & 10 deletions datafusion/core/src/optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,10 @@
//! some simple rules to a logical plan, such as "Projection Push Down" and "Type Coercion".

#![allow(clippy::module_inception)]
pub mod common_subexpr_eliminate;
pub mod eliminate_filter;
pub mod eliminate_limit;
pub mod filter_push_down;
pub mod limit_push_down;
pub mod optimizer;
pub mod projection_push_down;
pub mod simplify_expressions;
pub mod single_distinct_to_groupby;
pub mod subquery_filter_to_join;
pub mod utils;

pub use datafusion_optimizer::{
common_subexpr_eliminate, eliminate_filter, eliminate_limit, filter_push_down,
limit_push_down, optimizer, projection_push_down, single_distinct_to_groupby,
subquery_filter_to_join, utils,
};
18 changes: 1 addition & 17 deletions datafusion/core/src/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::error::Result;
use crate::from_slice::FromSlice;
use crate::logical_plan::LogicalPlan;
use crate::physical_plan::file_format::{CsvExec, FileScanConfig};
use crate::test_util::{aggr_test_schema, scan_empty};
use crate::test_util::aggr_test_schema;
use array::{Array, ArrayRef};
use arrow::array::{self, DecimalBuilder, Int32Array};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
Expand Down Expand Up @@ -128,21 +128,6 @@ pub fn partitioned_csv_config(
})
}

/// some tests share a common table with different names
pub fn test_table_scan_with_name(name: &str) -> Result<LogicalPlan> {
let schema = Schema::new(vec![
Field::new("a", DataType::UInt32, false),
Field::new("b", DataType::UInt32, false),
Field::new("c", DataType::UInt32, false),
]);
scan_empty(Some(name), &schema, None)?.build()
}

/// some tests share a common table
pub fn test_table_scan() -> Result<LogicalPlan> {
test_table_scan_with_name("test")
}

pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
let actual: Vec<String> = plan
.schema()
Expand Down Expand Up @@ -259,5 +244,4 @@ fn create_batch(schema: &Schema) -> RecordBatch {

pub mod exec;
pub mod object_store;
pub mod user_defined;
pub mod variable;
46 changes: 46 additions & 0 deletions datafusion/optimizer/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "datafusion-optimizer"
description = "DataFusion Query Optimizer"
version = "8.0.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
readme = "README.md"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = [ "datafusion", "query", "optimizer" ]
edition = "2021"
rust-version = "1.59"

[lib]
name = "datafusion_optimizer"
path = "src/lib.rs"

[features]
default = ["unicode_expressions"]
unicode_expressions = []

[dependencies]
arrow = { version = "15.0.0", features = ["prettyprint"] }
async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "8.0.0" }
datafusion-expr = { path = "../expr", version = "8.0.0" }
hashbrown = { version = "0.12", features = ["raw"] }
log = "^0.4"
26 changes: 26 additions & 0 deletions datafusion/optimizer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# DataFusion Query Optimizer Rules

[DataFusion](df) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.

This crate is a submodule of DataFusion that provides query optimizer rules.

[df]: https://crates.io/crates/datafusion
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! Eliminate common sub-expression.

use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::{OptimizerConfig, OptimizerRule};
use arrow::datatypes::DataType;
use datafusion_common::{DFField, DFSchema, Result};
use datafusion_expr::{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@
//! Optimizer rule to replace `where false` on a plan with an empty relation.
//! This saves time in planning and executing the query.
//! Note that this rule should be applied after simplify expressions optimizer rule.
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{Result, ScalarValue};
use datafusion_expr::{
logical_plan::{EmptyRelation, Filter, LogicalPlan},
utils::from_plan,
Expr,
};

use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};

/// Optimization rule that elimanate the scalar value (true/false) filter with an [LogicalPlan::EmptyRelation]
#[derive(Default)]
pub struct EliminateFilter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! Optimizer rule to replace `LIMIT 0` on a plan with an empty relation.
//! This saves time in planning and executing the query.
use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::Result;
use datafusion_expr::{
logical_plan::{EmptyRelation, Limit, LogicalPlan},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@

//! Filter Push Down optimizer rule ensures that filters are applied as early as possible in the plan

use crate::optimizer::{
optimizer::{OptimizerConfig, OptimizerRule},
utils,
};
use crate::{utils, OptimizerConfig, OptimizerRule};
use datafusion_common::{Column, DFSchema, Result};
use datafusion_expr::{
col,
Expand Down
32 changes: 32 additions & 0 deletions datafusion/optimizer/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

pub mod common_subexpr_eliminate;
pub mod eliminate_filter;
pub mod eliminate_limit;
pub mod filter_push_down;
pub mod limit_push_down;
pub mod optimizer;
pub mod projection_push_down;
pub mod single_distinct_to_groupby;
pub mod subquery_filter_to_join;
pub mod utils;

#[cfg(test)]
pub mod test;

pub use optimizer::{OptimizerConfig, OptimizerRule};
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! Optimizer rule to push down LIMIT in the query plan
//! It will push down through projection, limits (taking the smaller limit)
use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{DataFusionError, Result};
use datafusion_expr::{
logical_plan::{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Projection Push Down optimizer rule ensures that only referenced columns are
//! loaded into memory

use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::{OptimizerConfig, OptimizerRule};
use arrow::datatypes::{Field, Schema};
use arrow::error::Result as ArrowResult;
use datafusion_common::{
Expand Down Expand Up @@ -530,7 +530,6 @@ mod tests {

use super::*;
use crate::test::*;
use crate::test_util::scan_empty;
use arrow::datatypes::DataType;
use datafusion_expr::{
col, lit,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! single distinct to group by optimizer rule

use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{DFSchema, Result};
use datafusion_expr::{
col,
Expand Down Expand Up @@ -201,10 +201,10 @@ impl OptimizerRule for SingleDistinctToGroupBy {
#[cfg(test)]
mod tests {
use super::*;
use crate::physical_plan::aggregates;
use crate::test::*;
use datafusion_expr::{
col, count, count_distinct, lit, logical_plan::builder::LogicalPlanBuilder, max,
AggregateFunction,
};

fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
Expand Down Expand Up @@ -314,7 +314,7 @@ mod tests {
vec![
count_distinct(col("b")),
Expr::AggregateFunction {
fun: aggregates::AggregateFunction::Max,
fun: AggregateFunction::Max,
distinct: true,
args: vec![col("b")],
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@
//! WHERE t1.f IN (SELECT f FROM t2) OR t2.f = 'x'
//! ```
//! won't
use crate::optimizer::{
optimizer::{OptimizerConfig, OptimizerRule},
utils,
};
use crate::{utils, OptimizerConfig, OptimizerRule};
use datafusion_common::{DataFusionError, Result};
use datafusion_expr::{
logical_plan::{
Expand Down
56 changes: 56 additions & 0 deletions datafusion/optimizer/src/test/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow::datatypes::{DataType, Field, Schema};
use datafusion_common::Result;
use datafusion_expr::{logical_plan::table_scan, LogicalPlan, LogicalPlanBuilder};

pub mod user_defined;

/// some tests share a common table with different names
pub fn test_table_scan_with_name(name: &str) -> Result<LogicalPlan> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions don't appear to be being used by any tests that haven't also been moved, so perhaps they could be removed from core also?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have removed this from core now

let schema = Schema::new(vec![
Field::new("a", DataType::UInt32, false),
Field::new("b", DataType::UInt32, false),
Field::new("c", DataType::UInt32, false),
]);
table_scan(Some(name), &schema, None)?.build()
}

/// some tests share a common table
pub fn test_table_scan() -> Result<LogicalPlan> {
test_table_scan_with_name("test")
}

/// Scan an empty data source, mainly used in tests
pub fn scan_empty(
name: Option<&str>,
table_schema: &Schema,
projection: Option<Vec<usize>>,
) -> Result<LogicalPlanBuilder> {
table_scan(name, table_schema, projection)
}

pub fn assert_fields_eq(plan: &LogicalPlan, expected: Vec<&str>) {
let actual: Vec<String> = plan
.schema()
.fields()
.iter()
.map(|f| f.name().clone())
.collect();
assert_eq!(actual, expected);
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@

//! Simple user defined logical plan node for testing

use datafusion_common::DFSchemaRef;
use datafusion_expr::{
logical_plan::{Extension, UserDefinedLogicalNode},
Expr, LogicalPlan,
};
use std::{
any::Any,
fmt::{self, Debug},
sync::Arc,
};

use crate::logical_plan::plan::Extension;
use crate::logical_plan::{DFSchemaRef, Expr, LogicalPlan, UserDefinedLogicalNode};

/// Create a new user defined plan node, for testing
pub fn new(input: LogicalPlan) -> LogicalPlan {
let node = Arc::new(TestUserDefinedPlanNode { input });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

//! Collection of utility functions that are leveraged by the query optimizer rules

use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_expr::{
and,
Expand Down