-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Factorize common AND factors out of OR predicates to support filterPu… #3859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3e5ea52
9b89398
bd101a3
3e076a0
fce97fe
e8cda82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
|
|
||
| //! Filter Push Down optimizer rule ensures that filters are applied as early as possible in the plan | ||
|
|
||
| use crate::utils::{split_conjunction, CnfHelper}; | ||
| use crate::{utils, OptimizerConfig, OptimizerRule}; | ||
| use datafusion_common::{Column, DFSchema, DataFusionError, Result}; | ||
| use datafusion_expr::{ | ||
|
|
@@ -28,6 +29,7 @@ use datafusion_expr::{ | |
| utils::{expr_to_columns, exprlist_to_columns, from_plan}, | ||
| Expr, Operator, TableProviderFilterPushDown, | ||
| }; | ||
| use log::error; | ||
| use std::collections::{HashMap, HashSet}; | ||
| use std::iter::once; | ||
|
|
||
|
|
@@ -530,7 +532,14 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result<LogicalPlan> { | |
| } | ||
| LogicalPlan::Analyze { .. } => push_down(&state, plan), | ||
| LogicalPlan::Filter(filter) => { | ||
| let predicates = utils::split_conjunction(filter.predicate()); | ||
| let filter_cnf = filter.predicate().clone().rewrite(&mut CnfHelper::new()); | ||
| let predicates = match filter_cnf { | ||
| Ok(ref expr) => split_conjunction(expr), | ||
| Err(e) => { | ||
| error!("Fail at CnfHelper rewrite: {}.", e); | ||
| split_conjunction(filter.predicate()) | ||
| } | ||
| }; | ||
|
|
||
| predicates | ||
| .into_iter() | ||
|
|
@@ -953,6 +962,30 @@ mod tests { | |
| Ok(()) | ||
| } | ||
|
|
||
| #[test] | ||
| fn filter_keep_partial_agg() -> Result<()> { | ||
| let table_scan = test_table_scan()?; | ||
| let f1 = col("c").eq(lit(1i64)).and(col("b").gt(lit(2i64))); | ||
| let f2 = col("c").eq(lit(1i64)).and(col("b").gt(lit(3i64))); | ||
| let filter = f1.or(f2); | ||
| let plan = LogicalPlanBuilder::from(table_scan) | ||
| .aggregate(vec![col("a")], vec![sum(col("b")).alias("b")])? | ||
| .filter(filter)? | ||
| .build()?; | ||
| // filter of aggregate is after aggregation since they are non-commutative | ||
| // (c =1 AND b > 2) OR (c = 1 AND b > 3) | ||
| // rewrite to CNF | ||
| // (c = 1 OR c = 1) [can pushDown] AND (c = 1 OR b > 3) AND (b > 2 OR C = 1) AND (b > 2 OR b > 3) | ||
|
|
||
| let expected = "\ | ||
| Filter: test.c = Int64(1) OR b > Int64(3) AND b > Int64(2) OR test.c = Int64(1) AND b > Int64(2) OR b > Int64(3)\ | ||
| \n Aggregate: groupBy=[[test.a]], aggr=[[SUM(test.b) AS b]]\ | ||
| \n Filter: test.c = Int64(1) OR test.c = Int64(1)\ | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Somewhere need simplify this
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be a rule in https://github.com/apache/arrow-datafusion/blob/37fe938261636bb7710b26cf26e2bbd010e1dbf0/datafusion/optimizer/src/simplify_expressions.rs#L264 That pass gets run several times I recommend we file a follow on ticket for that particular rewrite (the same thing applies to
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filed #3895 |
||
| \n TableScan: test"; | ||
| assert_optimized_plan_eq(&plan, expected); | ||
| Ok(()) | ||
| } | ||
|
|
||
| /// verifies that a filter is pushed to before a projection, the filter expression is correctly re-written | ||
| #[test] | ||
| fn alias() -> Result<()> { | ||
|
|
@@ -2344,7 +2377,7 @@ mod tests { | |
| .filter(filter)? | ||
| .build()?; | ||
|
|
||
| let expected = "Filter: test.a = d AND test.b > UInt32(1) OR test.b = e AND test.c < UInt32(10)\ | ||
| let expected = "Filter: test.a = d OR test.b = e AND test.a = d OR test.c < UInt32(10) AND test.b > UInt32(1) OR test.b = e\ | ||
| \n CrossJoin:\ | ||
| \n Projection: test.a, test.b, test.c\ | ||
| \n Filter: test.b > UInt32(1) OR test.c < UInt32(10)\ | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It used to get not pushed:
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@alamb @andygrove @Dandandan Do you think this is a reasonable improvement 🤔?
If this ok, i will modify the another test😂
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is a good improvement for sure -- thank you