diff --git a/datafusion/physical-expr/src/projection.rs b/datafusion/physical-expr/src/projection.rs index 8320983c10ab..cee95685e844 100644 --- a/datafusion/physical-expr/src/projection.rs +++ b/datafusion/physical-expr/src/projection.rs @@ -661,7 +661,7 @@ impl ProjectionExprs { for proj_expr in self.exprs.iter() { let expr = &proj_expr.expr; let col_stats = if let Some(col) = expr.downcast_ref::() { - std::mem::take(&mut stats.column_statistics[col.index()]) + stats.column_statistics[col.index()].clone() } else if let Some(literal) = expr.downcast_ref::() { // Handle literal expressions (constants) by calculating proper statistics let data_type = expr.data_type(output_schema)?; @@ -2866,6 +2866,56 @@ pub(crate) mod tests { Ok(()) } + #[test] + fn test_project_statistics_duplicate_column() -> Result<()> { + let input_stats = get_stats(); + let col0 = input_stats.column_statistics[0].clone(); + let projection = ProjectionExprs::new([ + ProjectionExpr::new(Arc::new(Column::new("col0", 0)), "a"), + ProjectionExpr::new(Arc::new(Column::new("col0", 0)), "b"), + ]); + + let output_schema = projection.project_schema(&get_schema())?; + let output_stats = projection.project_statistics(input_stats, &output_schema)?; + + assert_eq!(output_stats.column_statistics, vec![col0.clone(), col0]); + Ok(()) + } + + #[test] + fn test_project_statistics_column_and_cast() -> Result<()> { + let input_stats = get_stats(); + let col0 = input_stats.column_statistics[0].clone(); + let projection = ProjectionExprs::new([ + ProjectionExpr::new(Arc::new(Column::new("col0", 0)), "num"), + ProjectionExpr::new( + Arc::new(CastExpr::new( + Arc::new(Column::new("col0", 0)), + DataType::Int32, + None, + )), + "casted", + ), + ]); + + let output_schema = projection.project_schema(&get_schema())?; + let output_stats = projection.project_statistics(input_stats, &output_schema)?; + + assert_eq!(output_stats.column_statistics[0], col0); + assert_eq!( + output_stats.column_statistics[1], + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(-4))), + max_value: Precision::Exact(ScalarValue::Int32(Some(21))), + distinct_count: Precision::Exact(5), + null_count: Precision::Exact(0), + sum_value: Precision::Absent, + byte_size: Precision::Absent, + } + ); + Ok(()) + } + #[test] fn test_project_statistics_primitive_width_only() -> Result<()> { let input_stats = get_stats();