diff --git a/datafusion/src/optimizer/projection_push_down.rs b/datafusion/src/optimizer/projection_push_down.rs index 0272b9f7872c..089dca2318c9 100644 --- a/datafusion/src/optimizer/projection_push_down.rs +++ b/datafusion/src/optimizer/projection_push_down.rs @@ -173,7 +173,17 @@ fn optimize_plan( true, execution_props, )?; - if new_fields.is_empty() { + + let new_required_columns_optimized = new_input + .schema() + .fields() + .iter() + .map(|f| f.qualified_column()) + .collect::>(); + + if new_fields.is_empty() + || (has_projection && &new_required_columns_optimized == required_columns) + { // no need for an expression at all Ok(new_input) } else { @@ -496,6 +506,60 @@ mod tests { Ok(()) } + #[test] + fn redundunt_project() -> Result<()> { + let table_scan = test_table_scan()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![col("a"), col("b"), col("c")])? + .project(vec![col("a"), col("c"), col("b")])? + .build()?; + let expected = "Projection: #test.a, #test.c, #test.b\ + \n TableScan: test projection=Some([0, 1, 2])"; + + assert_optimized_plan_eq(&plan, expected); + + Ok(()) + } + + #[test] + fn reorder_projection() -> Result<()> { + let table_scan = test_table_scan()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![col("c"), col("b"), col("a")])? + .build()?; + let expected = "Projection: #test.c, #test.b, #test.a\ + \n TableScan: test projection=Some([0, 1, 2])"; + + assert_optimized_plan_eq(&plan, expected); + + Ok(()) + } + + #[test] + fn noncontiguous_redundunt_projection() -> Result<()> { + let table_scan = test_table_scan()?; + + let plan = LogicalPlanBuilder::from(table_scan) + .project(vec![col("c"), col("b"), col("a")])? + .filter(col("c").gt(lit(1)))? + .project(vec![col("c"), col("a"), col("b")])? + .filter(col("b").gt(lit(1)))? + .filter(col("a").gt(lit(1)))? + .project(vec![col("a"), col("c"), col("b")])? + .build()?; + let expected = "Projection: #test.a, #test.c, #test.b\ + \n Filter: #test.a Gt Int32(1)\ + \n Filter: #test.b Gt Int32(1)\ + \n Filter: #test.c Gt Int32(1)\ + \n TableScan: test projection=Some([0, 1, 2])"; + + assert_optimized_plan_eq(&plan, expected); + + Ok(()) + } + #[test] fn join_schema_trim_full_join_column_projection() -> Result<()> { let table_scan = test_table_scan()?; @@ -812,8 +876,7 @@ mod tests { assert_fields_eq(&plan, vec!["c", "a", "MAX(test.b)"]); - let expected = "\ - Projection: #test.c, #test.a, #MAX(test.b)\ + let expected = "Projection: #test.c, #test.a, #MAX(test.b)\ \n Filter: #test.c Gt Int32(1)\ \n Aggregate: groupBy=[[#test.a, #test.c]], aggr=[[MAX(#test.b)]]\ \n TableScan: test projection=Some([0, 1, 2])";