Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion/core/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ pub use datafusion_expr::{
to_timestamp_micros, to_timestamp_millis, to_timestamp_seconds, translate, trim,
trunc, unalias, upper, when, Expr, ExprSchemable, Literal, Operator,
};
pub use datafusion_optimizer::expr_simplifier::{ExprSimplifiable, SimplifyInfo};
pub use datafusion_optimizer::expr_simplifier::SimplifyInfo;
8 changes: 5 additions & 3 deletions datafusion/core/tests/simplification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@

use arrow::datatypes::{DataType, Field, Schema};
use datafusion::logical_plan::ExprSchemable;
use datafusion::logical_plan::ExprSimplifiable;
use datafusion::{
error::Result,
execution::context::ExecutionProps,
logical_plan::{DFSchema, Expr, SimplifyInfo},
prelude::*,
};
use datafusion_optimizer::expr_simplifier::ExprSimplifier;

/// In order to simplify expressions, DataFusion must have information
/// about the expressions.
Expand Down Expand Up @@ -90,7 +90,8 @@ fn basic() {
// optimize form `a < 5` automatically
let expr = col("a").lt(lit(2i32) + lit(3i32));

let simplified = expr.simplify(&info).unwrap();
let simplifier = ExprSimplifier::new(info);
let simplified = simplifier.simplify(expr).unwrap();
assert_eq!(simplified, col("a").lt(lit(5i32)));
}

Expand All @@ -103,6 +104,7 @@ fn fold_and_simplify() {

// Since datafusion applies both simplification *and* rewriting
// some expressions can be entirely simplified
let simplified = expr.simplify(&info).unwrap();
let simplifier = ExprSimplifier::new(info);
let simplified = simplifier.simplify(expr).unwrap();
assert_eq!(simplified, lit(true))
}
2 changes: 1 addition & 1 deletion datafusion/expr/src/logical_plan/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ impl Aggregate {
/// building the schema again when the schema is already known.
///
/// This method should only be called when you are absolutely sure that the schema being
/// provided is correct for the aggregate. If in doubt, call [try_new] instead.
/// provided is correct for the aggregate. If in doubt, call [try_new](Self::try_new) instead.
pub fn try_new_with_schema(
input: Arc<LogicalPlan>,
group_expr: Vec<Expr>,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/type_coercion/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use datafusion_common::{DataFusionError, Result};
/// each argument must be coerced to match `signature`.
///
/// For more details on coercion in general, please see the
/// [`type_coercion`](datafusion::expr::type_coercion) module.
/// [`type_coercion`](datafusion_expr::type_coercion) module.
pub fn data_types(
current_types: &[DataType],
signature: &Signature,
Expand Down
213 changes: 195 additions & 18 deletions datafusion/optimizer/src/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,24 @@
// specific language governing permissions and limitations
// under the License.

//! Expression simplifier
//! Expression simplification API
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR looks bigger than it really is -- it moves a bunch of code around and there are a significant number of docstrings


use crate::simplify_expressions::{ConstEvaluator, Simplifier};
use datafusion_common::Result;
use datafusion_expr::{expr_rewriter::ExprRewritable, Expr};
use crate::{
simplify_expressions::{ConstEvaluator, Simplifier},
type_coercion::TypeCoercionRewriter,
};
use arrow::datatypes::DataType;
use datafusion_common::{DFSchemaRef, DataFusionError, Result};
use datafusion_expr::{expr_rewriter::ExprRewritable, Expr, ExprSchemable};
use datafusion_physical_expr::execution_props::ExecutionProps;

#[allow(rustdoc::private_intra_doc_links)]
/// The information necessary to apply algebraic simplification to an
/// [Expr]. See [SimplifyContext](crate::optimizer::simplify_expressions::SimplifyContext)
/// for one implementation
/// [Expr]. See [SimplifyContext] for one concrete implementation.
///
/// This trait exists so that other systems can plug schema
/// information in without having to create `DFSchema` objects. If you
/// have a [`DFSchemaRef`] you can use [`SimplifyContext`]
pub trait SimplifyInfo {
/// returns true if this Expr has boolean type
fn is_boolean_type(&self, expr: &Expr) -> Result<bool>;
Expand All @@ -37,28 +44,41 @@ pub trait SimplifyInfo {
fn execution_props(&self) -> &ExecutionProps;
}

/// trait for types that can be simplified
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed in favor of calling ExprSimplifier::simplify directly

pub trait ExprSimplifiable: Sized {
/// simplify this trait object using the given SimplifyInfo
fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self>;
/// This structure handles API for expression simplification
pub struct ExprSimplifier<S> {
info: S,
}

impl ExprSimplifiable for Expr {
impl<S: SimplifyInfo> ExprSimplifier<S> {
/// Create a new `ExprSimplifier` with the given `info`. See
/// [`simplify`](Self::simplify) for an example.
pub fn new(info: S) -> Self {
Self { info }
}

/// Simplifies this [`Expr`]`s as much as possible, evaluating
/// constants and applying algebraic simplifications
/// constants and applying algebraic simplifications.
///
/// The types of the expression must match what operators expect,
/// or else an error may occur trying to evaluate. See
/// [`coerce`](Self::coerce) for a function to help.
///
/// # Example:
///
/// `b > 2 AND b > 2`
///
/// can be written to
///
/// `b > 2`
///
/// ```
/// use datafusion_expr::{col, lit, Expr};
/// use datafusion_common::Result;
/// use datafusion_physical_expr::execution_props::ExecutionProps;
/// use datafusion_optimizer::expr_simplifier::{SimplifyInfo, ExprSimplifiable};
/// use datafusion_optimizer::expr_simplifier::{ExprSimplifier, SimplifyInfo};
///
/// /// Simple implementation that provides `Simplifier` the information it needs
/// /// See SimplifyContext for a structure that does this.
/// #[derive(Default)]
/// struct Info {
/// execution_props: ExecutionProps,
Expand All @@ -76,24 +96,181 @@ impl ExprSimplifiable for Expr {
/// }
/// }
///
/// // Create the simplifier
/// let simplifier = ExprSimplifier::new(Info::default());
///
/// // b < 2
/// let b_lt_2 = col("b").gt(lit(2));
///
/// // (b < 2) OR (b < 2)
/// let expr = b_lt_2.clone().or(b_lt_2.clone());
///
/// // (b < 2) OR (b < 2) --> (b < 2)
/// let expr = expr.simplify(&Info::default()).unwrap();
/// let expr = simplifier.simplify(expr).unwrap();
/// assert_eq!(expr, b_lt_2);
/// ```
fn simplify<S: SimplifyInfo>(self, info: &S) -> Result<Self> {
let mut rewriter = Simplifier::new(info);
let mut const_evaluator = ConstEvaluator::try_new(info.execution_props())?;
pub fn simplify(&self, expr: Expr) -> Result<Expr> {
let mut rewriter = Simplifier::new(&self.info);
let mut const_evaluator = ConstEvaluator::try_new(self.info.execution_props())?;

// TODO iterate until no changes are made during rewrite
// (evaluating constants can enable new simplifications and
// simplifications can enable new constant evaluation)
// https://github.com/apache/arrow-datafusion/issues/1160
self.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter)
expr.rewrite(&mut const_evaluator)?.rewrite(&mut rewriter)
}

/// Apply type coercion to an [`Expr`] so that it can be
/// evaluated as a [`PhysicalExpr`](datafusion_physical_expr::PhysicalExpr).
///
/// See the [type coercion module](datafusion_expr::type_coercion)
/// documentation for more details on type coercion
///
// Would be nice if this API could use the SimplifyInfo
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this PR is approved, I will file a ticket to remove the schema argument in this function call.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// rather than creating an DFSchemaRef coerces rather than doing
// it manually.
// https://github.com/apache/arrow-datafusion/issues/3793
pub fn coerce(&self, expr: Expr, schema: DFSchemaRef) -> Result<Expr> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is the new API for coercion

Copy link
Contributor

@ygf11 ygf11 Oct 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ticekt, typo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call -- will file ticket.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let mut expr_rewrite = TypeCoercionRewriter { schema };

expr.rewrite(&mut expr_rewrite)
}
}

/// Provides simplification information based on DFSchema and
/// [`ExecutionProps`]. This is the default implementation used by DataFusion
///
/// For example:
/// ```
/// use arrow::datatypes::{Schema, Field, DataType};
/// use datafusion_expr::{col, lit};
/// use datafusion_common::{DataFusionError, ToDFSchema};
/// use datafusion_physical_expr::execution_props::ExecutionProps;
/// use datafusion_optimizer::expr_simplifier::{SimplifyContext, ExprSimplifier};
///
/// // Create the schema
/// let schema = Schema::new(vec![
/// Field::new("i", DataType::Int64, false),
/// ])
/// .to_dfschema_ref().unwrap();
///
/// // Create the simplifier
/// let props = ExecutionProps::new();
/// let context = SimplifyContext::new(&props)
/// .with_schema(schema);
/// let simplifier = ExprSimplifier::new(context);
///
/// // Use the simplifier
///
/// // b < 2 or (1 > 3)
/// let expr = col("b").lt(lit(2)).or(lit(1).gt(lit(3)));
///
/// // b < 2
/// let simplified = simplifier.simplify(expr).unwrap();
/// assert_eq!(simplified, col("b").lt(lit(2)));
/// ```
pub struct SimplifyContext<'a> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was moved into the public API and I added some docs and a builder interface with_schema

schemas: Vec<DFSchemaRef>,
props: &'a ExecutionProps,
}

impl<'a> SimplifyContext<'a> {
/// Create a new SimplifyContext
pub fn new(props: &'a ExecutionProps) -> Self {
Self {
schemas: vec![],
props,
}
}

/// Register a [`DFSchemaRef`] with this context
pub fn with_schema(mut self, schema: DFSchemaRef) -> Self {
self.schemas.push(schema);
self
}
}

impl<'a> SimplifyInfo for SimplifyContext<'a> {
/// returns true if this Expr has boolean type
fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
for schema in &self.schemas {
if let Ok(DataType::Boolean) = expr.get_type(schema) {
return Ok(true);
}
}

Ok(false)
}
/// Returns true if expr is nullable
fn nullable(&self, expr: &Expr) -> Result<bool> {
self.schemas
.iter()
.find_map(|schema| {
// expr may be from another input, so ignore errors
// by converting to None to keep trying
expr.nullable(schema.as_ref()).ok()
})
.ok_or_else(|| {
// This means we weren't able to compute `Expr::nullable` with
// *any* input schemas, signalling a problem
DataFusionError::Internal(format!(
"Could not find columns in '{}' during simplify",
expr
))
})
}

fn execution_props(&self) -> &ExecutionProps {
self.props
}
}

#[cfg(test)]
mod tests {
use super::*;
use arrow::datatypes::{Field, Schema};
use datafusion_common::ToDFSchema;
use datafusion_expr::{col, lit};

#[test]
fn api_basic() {
let props = ExecutionProps::new();
let simplifier =
ExprSimplifier::new(SimplifyContext::new(&props).with_schema(test_schema()));

let expr = lit(1) + lit(2);
let expected = lit(3);
assert_eq!(expected, simplifier.simplify(expr).unwrap());
}

#[test]
fn basic_coercion() {
let schema = test_schema();
let props = ExecutionProps::new();
let simplifier =
ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema.clone()));

// Note expr type is int32 (not int64)
// (1i64 + 2i32) < i
let expr = (lit(1i64) + lit(2i32)).lt(col("i"));
// should fully simplify to 3 < i (though i has been coerced to i64)
let expected = lit(3i64).lt(col("i"));

// Would be nice if this API could use the SimplifyInfo
// rather than creating an DFSchemaRef coerces rather than doing
// it manually.
// https://github.com/apache/arrow-datafusion/issues/3793
let expr = simplifier.coerce(expr, schema).unwrap();

assert_eq!(expected, simplifier.simplify(expr).unwrap());
}

fn test_schema() -> DFSchemaRef {
Schema::new(vec![
Field::new("i", DataType::Int64, false),
Field::new("b", DataType::Boolean, true),
])
.to_dfschema_ref()
.unwrap()
}
}
Loading