From 7c374c774fcc5c04c933525a3dfac2effbd23e9f Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Sun, 31 May 2026 18:16:26 -0400 Subject: [PATCH 1/2] . --- datafusion/physical-expr/src/projection.rs | 55 +++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/projection.rs b/datafusion/physical-expr/src/projection.rs index 8320983c10ab7..bdf8b4d5882d2 100644 --- a/datafusion/physical-expr/src/projection.rs +++ b/datafusion/physical-expr/src/projection.rs @@ -661,7 +661,7 @@ impl ProjectionExprs { for proj_expr in self.exprs.iter() { let expr = &proj_expr.expr; let col_stats = if let Some(col) = expr.downcast_ref::() { - std::mem::take(&mut stats.column_statistics[col.index()]) + stats.column_statistics[col.index()].clone() } else if let Some(literal) = expr.downcast_ref::() { // Handle literal expressions (constants) by calculating proper statistics let data_type = expr.data_type(output_schema)?; @@ -2866,6 +2866,59 @@ pub(crate) mod tests { Ok(()) } + #[test] + fn test_project_statistics_duplicate_column() -> Result<()> { + // SELECT col0 AS a, col0 AS b: both outputs carry col0's statistics. + let input_stats = get_stats(); + let col0 = input_stats.column_statistics[0].clone(); + let projection = ProjectionExprs::new([ + ProjectionExpr::new(Arc::new(Column::new("col0", 0)), "a"), + ProjectionExpr::new(Arc::new(Column::new("col0", 0)), "b"), + ]); + + let output_schema = projection.project_schema(&get_schema())?; + let output_stats = projection.project_statistics(input_stats, &output_schema)?; + + assert_eq!(output_stats.column_statistics, vec![col0.clone(), col0]); + Ok(()) + } + + #[test] + fn test_project_statistics_column_and_cast() -> Result<()> { + // SELECT col0 AS num, CAST(col0 AS Int32) AS casted: the passthrough + // copies col0's stats; the cast keeps them with min/max cast to Int32. + let input_stats = get_stats(); + let col0 = input_stats.column_statistics[0].clone(); + let projection = ProjectionExprs::new([ + ProjectionExpr::new(Arc::new(Column::new("col0", 0)), "num"), + ProjectionExpr::new( + Arc::new(CastExpr::new( + Arc::new(Column::new("col0", 0)), + DataType::Int32, + None, + )), + "casted", + ), + ]); + + let output_schema = projection.project_schema(&get_schema())?; + let output_stats = projection.project_statistics(input_stats, &output_schema)?; + + assert_eq!(output_stats.column_statistics[0], col0); + assert_eq!( + output_stats.column_statistics[1], + ColumnStatistics { + min_value: Precision::Exact(ScalarValue::Int32(Some(-4))), + max_value: Precision::Exact(ScalarValue::Int32(Some(21))), + distinct_count: Precision::Exact(5), + null_count: Precision::Exact(0), + sum_value: Precision::Absent, + byte_size: Precision::Absent, + } + ); + Ok(()) + } + #[test] fn test_project_statistics_primitive_width_only() -> Result<()> { let input_stats = get_stats(); From 07b4a043223c504b1fa43fa8a602be1579515550 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Mon, 1 Jun 2026 07:03:03 -0400 Subject: [PATCH 2/2] Trim comments in tests --- datafusion/physical-expr/src/projection.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/datafusion/physical-expr/src/projection.rs b/datafusion/physical-expr/src/projection.rs index bdf8b4d5882d2..cee95685e8440 100644 --- a/datafusion/physical-expr/src/projection.rs +++ b/datafusion/physical-expr/src/projection.rs @@ -2868,7 +2868,6 @@ pub(crate) mod tests { #[test] fn test_project_statistics_duplicate_column() -> Result<()> { - // SELECT col0 AS a, col0 AS b: both outputs carry col0's statistics. let input_stats = get_stats(); let col0 = input_stats.column_statistics[0].clone(); let projection = ProjectionExprs::new([ @@ -2885,8 +2884,6 @@ pub(crate) mod tests { #[test] fn test_project_statistics_column_and_cast() -> Result<()> { - // SELECT col0 AS num, CAST(col0 AS Int32) AS casted: the passthrough - // copies col0's stats; the cast keeps them with min/max cast to Int32. let input_stats = get_stats(); let col0 = input_stats.column_statistics[0].clone(); let projection = ProjectionExprs::new([