Skip to content

Commit

Permalink
Skip filter push down on semi/anti joins (#3723)
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove committed Oct 5, 2022
1 parent beeb631 commit 965133c
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
9 changes: 4 additions & 5 deletions datafusion/optimizer/src/filter_push_down.rs
Expand Up @@ -193,11 +193,10 @@ fn on_lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
JoinType::Left => Ok((false, true)),
JoinType::Right => Ok((true, false)),
JoinType::Full => Ok((false, false)),
// Semi/Anti joins can not have join filter.
JoinType::Semi | JoinType::Anti => Err(DataFusionError::Internal(
"on_lr_is_preserved cannot be appplied to SEMI/ANTI-JOIN nodes"
.to_string(),
)),
JoinType::Semi | JoinType::Anti => {
// filter_push_down does not yet support SEMI/ANTI joins with join conditions
Ok((false, false))
}
},
LogicalPlan::CrossJoin(_) => Err(DataFusionError::Internal(
"on_lr_is_preserved cannot be applied to CROSSJOIN nodes".to_string(),
Expand Down
32 changes: 32 additions & 0 deletions datafusion/optimizer/tests/integration-test.rs
Expand Up @@ -69,6 +69,38 @@ fn distribute_by() -> Result<()> {
Ok(())
}

#[test]
fn semi_join_with_join_filter() -> Result<()> {
// regression test for https://github.com/apache/arrow-datafusion/issues/2888
let sql = "SELECT * FROM test WHERE EXISTS (\
SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \
AND test.col_uint32 != t2.col_uint32)";
let plan = test_sql(sql)?;
let expected = r#"Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64
Semi Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 != t2.col_uint32
TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]
SubqueryAlias: t2
TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]"#;
assert_eq!(expected, format!("{:?}", plan));
Ok(())
}

#[test]
fn anti_join_with_join_filter() -> Result<()> {
// regression test for https://github.com/apache/arrow-datafusion/issues/2888
let sql = "SELECT * FROM test WHERE NOT EXISTS (\
SELECT * FROM test t2 WHERE test.col_int32 = t2.col_int32 \
AND test.col_uint32 != t2.col_uint32)";
let plan = test_sql(sql)?;
let expected = r#"Projection: test.col_int32, test.col_uint32, test.col_utf8, test.col_date32, test.col_date64
Anti Join: test.col_int32 = t2.col_int32 Filter: test.col_uint32 != t2.col_uint32
TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]
SubqueryAlias: t2
TableScan: test projection=[col_int32, col_uint32, col_utf8, col_date32, col_date64]"#;
assert_eq!(expected, format!("{:?}", plan));
Ok(())
}

#[test]
fn intersect() -> Result<()> {
let sql = "SELECT col_int32, col_utf8 FROM test \
Expand Down

0 comments on commit 965133c

Please sign in to comment.