diff --git a/datafusion-examples/examples/relation_planner/table_sample.rs b/datafusion-examples/examples/relation_planner/table_sample.rs index 42342e5f1a641..df2bc7c160883 100644 --- a/datafusion-examples/examples/relation_planner/table_sample.rs +++ b/datafusion-examples/examples/relation_planner/table_sample.rs @@ -108,7 +108,7 @@ use datafusion::{ }, physical_expr::EquivalenceProperties, physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, StatisticsContext, metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput}, }, physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner}, @@ -722,8 +722,12 @@ impl ExecutionPlan for SampleExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let mut stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let mut stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); let ratio = self.upper_bound - self.lower_bound; // Scale statistics by sampling ratio (inexact due to randomness) diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index a068b4f5c0413..e36044602acba 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -45,7 +45,7 @@ mod tests { use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::write::BatchSerializer; use datafusion_expr::{col, lit}; - use datafusion_physical_plan::{ExecutionPlan, collect}; + use datafusion_physical_plan::{ExecutionPlan, collect, compute_statistics}; use arrow::array::{ Array, BooleanArray, Float64Array, Int32Array, RecordBatch, StringArray, @@ -215,9 +215,12 @@ mod tests { assert_eq!(tt_batches, 50 /* 100/2 */); // test metadata - assert_eq!(exec.partition_statistics(None)?.num_rows, Precision::Absent); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.num_rows, + Precision::Absent + ); + assert_eq!( + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent ); diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs index 5b3e22705620e..7d19b7f0f48d2 100644 --- a/datafusion/core/src/datasource/file_format/json.rs +++ b/datafusion/core/src/datasource/file_format/json.rs @@ -36,7 +36,7 @@ mod tests { BatchDeserializer, DecoderDeserializer, DeserializerOutput, }; use datafusion_datasource::file_format::FileFormat; - use datafusion_physical_plan::{ExecutionPlan, collect}; + use datafusion_physical_plan::{ExecutionPlan, collect, compute_statistics}; use arrow::compute::concat_batches; use arrow::datatypes::{DataType, Field}; @@ -117,9 +117,12 @@ mod tests { assert_eq!(tt_batches, 6 /* 12/2 */); // test metadata - assert_eq!(exec.partition_statistics(None)?.num_rows, Precision::Absent); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.num_rows, + Precision::Absent + ); + assert_eq!( + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent ); diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 6a8f7ab999757..207b990ff85b8 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -142,7 +142,7 @@ mod tests { use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::dml::InsertOp; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; - use datafusion_physical_plan::{ExecutionPlan, collect}; + use datafusion_physical_plan::{ExecutionPlan, collect, compute_statistics}; use crate::test_util::bounded_stream; use arrow::array::{ @@ -715,12 +715,12 @@ mod tests { // test metadata assert_eq!( - exec.partition_statistics(None)?.num_rows, + compute_statistics(exec.as_ref(), None)?.num_rows, Precision::Exact(8) ); // TODO correct byte size: https://github.com/apache/datafusion/issues/14936 assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent, ); @@ -764,11 +764,11 @@ mod tests { // note: even if the limit is set, the executor rounds up to the batch size assert_eq!( - exec.partition_statistics(None)?.num_rows, + compute_statistics(exec.as_ref(), None)?.num_rows, Precision::Exact(8) ); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent, ); let batches = collect(exec, task_ctx).await?; diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index d14ec1f56dce2..206cc9775c8f1 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -144,7 +144,9 @@ mod tests { use datafusion_physical_expr::expressions::binary; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::empty::EmptyExec; - use datafusion_physical_plan::{ExecutionPlanProperties, collect}; + use datafusion_physical_plan::{ + ExecutionPlanProperties, collect, compute_statistics, + }; use std::collections::HashMap; use std::io::Write; use std::sync::Arc; @@ -247,11 +249,11 @@ mod tests { // test metadata assert_eq!( - exec.partition_statistics(None)?.num_rows, + compute_statistics(exec.as_ref(), None)?.num_rows, Precision::Exact(8) ); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent, ); @@ -1355,13 +1357,13 @@ mod tests { let exec_default = table_default.scan(&state, None, &[], None).await?; assert_eq!( - exec_default.partition_statistics(None)?.num_rows, + compute_statistics(exec_default.as_ref(), None)?.num_rows, Precision::Absent ); // TODO correct byte size: https://github.com/apache/datafusion/issues/14936 assert_eq!( - exec_default.partition_statistics(None)?.total_byte_size, + compute_statistics(exec_default.as_ref(), None)?.total_byte_size, Precision::Absent ); @@ -1376,11 +1378,11 @@ mod tests { let exec_disabled = table_disabled.scan(&state, None, &[], None).await?; assert_eq!( - exec_disabled.partition_statistics(None)?.num_rows, + compute_statistics(exec_disabled.as_ref(), None)?.num_rows, Precision::Absent ); assert_eq!( - exec_disabled.partition_statistics(None)?.total_byte_size, + compute_statistics(exec_disabled.as_ref(), None)?.total_byte_size, Precision::Absent ); @@ -1395,12 +1397,12 @@ mod tests { let exec_enabled = table_enabled.scan(&state, None, &[], None).await?; assert_eq!( - exec_enabled.partition_statistics(None)?.num_rows, + compute_statistics(exec_enabled.as_ref(), None)?.num_rows, Precision::Exact(8) ); // TODO correct byte size: https://github.com/apache/datafusion/issues/14936 assert_eq!( - exec_enabled.partition_statistics(None)?.total_byte_size, + compute_statistics(exec_enabled.as_ref(), None)?.total_byte_size, Precision::Absent, ); diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs index cef75b444f6fe..a272f0cc2ed6e 100644 --- a/datafusion/core/tests/custom_sources_cases/mod.rs +++ b/datafusion/core/tests/custom_sources_cases/mod.rs @@ -41,6 +41,7 @@ use datafusion_common::stats::Precision; use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_physical_expr::EquivalenceProperties; use datafusion_physical_plan::PlanProperties; +use datafusion_physical_plan::StatisticsContext; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; @@ -179,7 +180,11 @@ impl ExecutionPlan for CustomExecutionPlan { Ok(Box::pin(TestCustomRecordBatchStream { nb_batch: 1 })) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(&self.schema()))); } diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs index 01c4deac5ccd3..37495e557ed2e 100644 --- a/datafusion/core/tests/custom_sources_cases/statistics.rs +++ b/datafusion/core/tests/custom_sources_cases/statistics.rs @@ -36,6 +36,8 @@ use datafusion_catalog::Session; use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_common::{project_schema, stats::Precision}; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::StatisticsContext; +use datafusion_physical_plan::compute_statistics; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use async_trait::async_trait; @@ -174,7 +176,11 @@ impl ExecutionPlan for StatisticsValidation { unimplemented!("This plan only serves for testing statistics") } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { Ok(Arc::new(Statistics::new_unknown(&self.schema))) } else { @@ -247,7 +253,7 @@ async fn sql_basic() -> Result<()> { let physical_plan = df.create_physical_plan().await.unwrap(); // the statistics should be those of the source - assert_eq!(stats, *physical_plan.partition_statistics(None)?); + assert_eq!(stats, *compute_statistics(physical_plan.as_ref(), None)?); Ok(()) } @@ -263,7 +269,7 @@ async fn sql_filter() -> Result<()> { .unwrap(); let physical_plan = df.create_physical_plan().await.unwrap(); - let stats = physical_plan.partition_statistics(None)?; + let stats = compute_statistics(physical_plan.as_ref(), None)?; assert_eq!(stats.num_rows, Precision::Inexact(7)); Ok(()) @@ -278,7 +284,7 @@ async fn sql_limit() -> Result<()> { let physical_plan = df.create_physical_plan().await.unwrap(); // when the limit is smaller than the original number of lines we mark the statistics as inexact // and cap NDV at the new row count - let limit_stats = physical_plan.partition_statistics(None)?; + let limit_stats = compute_statistics(physical_plan.as_ref(), None)?; assert_eq!(limit_stats.num_rows, Precision::Exact(5)); // c1: NDV=2 stays at 2 (already below limit of 5) assert_eq!( @@ -297,7 +303,7 @@ async fn sql_limit() -> Result<()> { .unwrap(); let physical_plan = df.create_physical_plan().await.unwrap(); // when the limit is larger than the original number of lines, statistics remain unchanged - assert_eq!(stats, *physical_plan.partition_statistics(None)?); + assert_eq!(stats, *compute_statistics(physical_plan.as_ref(), None)?); Ok(()) } @@ -314,7 +320,7 @@ async fn sql_window() -> Result<()> { let physical_plan = df.create_physical_plan().await.unwrap(); - let result = physical_plan.partition_statistics(None)?; + let result = compute_statistics(physical_plan.as_ref(), None)?; assert_eq!(stats.num_rows, result.num_rows); let col_stats = &result.column_statistics; diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 84396be8a6a67..f2a9aa4cf3388 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -41,6 +41,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_optimizer::filter_pushdown::FilterPushdown; use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::compute_statistics; use datafusion_physical_plan::filter::FilterExec; use tempfile::tempdir; @@ -61,7 +62,7 @@ async fn check_stats_precision_with_filter_pushdown() { // Scan without filter, stats are exact let exec = table.scan(&state, None, &[], None).await.unwrap(); assert_eq!( - exec.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec.as_ref(), None).unwrap().num_rows, Precision::Exact(8), "Stats without filter should be exact" ); @@ -93,7 +94,9 @@ async fn check_stats_precision_with_filter_pushdown() { ); // Scan with filter pushdown, stats are inexact assert_eq!( - optimized_exec.partition_statistics(None).unwrap().num_rows, + compute_statistics(optimized_exec.as_ref(), None) + .unwrap() + .num_rows, Precision::Inexact(8), "Stats after filter pushdown should be inexact" ); @@ -121,11 +124,13 @@ async fn load_table_stats_with_session_level_cache() { let exec1 = table1.scan(&state1, None, &[], None).await.unwrap(); assert_eq!( - exec1.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec1.as_ref(), None).unwrap().num_rows, Precision::Exact(8) ); assert_eq!( - exec1.partition_statistics(None).unwrap().total_byte_size, + compute_statistics(exec1.as_ref(), None) + .unwrap() + .total_byte_size, // Byte size is absent because we cannot estimate the output size // of the Arrow data since there are variable length columns. Precision::Absent, @@ -137,11 +142,13 @@ async fn load_table_stats_with_session_level_cache() { assert_eq!(get_static_cache_size(&state2), 0); let exec2 = table2.scan(&state2, None, &[], None).await.unwrap(); assert_eq!( - exec2.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec2.as_ref(), None).unwrap().num_rows, Precision::Exact(8) ); assert_eq!( - exec2.partition_statistics(None).unwrap().total_byte_size, + compute_statistics(exec2.as_ref(), None) + .unwrap() + .total_byte_size, // Absent because the data contains variable length columns Precision::Absent, ); @@ -152,11 +159,13 @@ async fn load_table_stats_with_session_level_cache() { assert_eq!(get_static_cache_size(&state1), 1); let exec3 = table1.scan(&state1, None, &[], None).await.unwrap(); assert_eq!( - exec3.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec3.as_ref(), None).unwrap().num_rows, Precision::Exact(8) ); assert_eq!( - exec3.partition_statistics(None).unwrap().total_byte_size, + compute_statistics(exec3.as_ref(), None) + .unwrap() + .total_byte_size, // Absent because the data contains variable length columns Precision::Absent, ); diff --git a/datafusion/core/tests/physical_optimizer/join_selection.rs b/datafusion/core/tests/physical_optimizer/join_selection.rs index 050baa9e792e9..2282e1c1eff84 100644 --- a/datafusion/core/tests/physical_optimizer/join_selection.rs +++ b/datafusion/core/tests/physical_optimizer/join_selection.rs @@ -45,7 +45,8 @@ use datafusion_physical_plan::joins::utils::JoinFilter; use datafusion_physical_plan::joins::{HashJoinExec, NestedLoopJoinExec, PartitionMode}; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, StatisticsContext, + compute_statistics, execution_plan::{Boundedness, EmissionType}, }; @@ -249,17 +250,13 @@ async fn test_join_with_swap() { .expect("The type of the plan should not be changed"); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -297,17 +294,13 @@ async fn test_left_join_no_swap() { .expect("The type of the plan should not be changed"); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -348,17 +341,13 @@ async fn test_join_with_swap_semi() { assert_eq!(swapped_join.schema().fields().len(), 1); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -401,17 +390,13 @@ async fn test_join_with_swap_mark() { assert_eq!(swapped_join.schema().fields().len(), 2); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -529,17 +514,13 @@ async fn test_join_no_swap() { .expect("The type of the plan should not be changed"); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -604,17 +585,13 @@ async fn test_nl_join_with_swap(join_type: JoinType) { ); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -677,17 +654,13 @@ async fn test_nl_join_with_swap_no_proj(join_type: JoinType) { ); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -1167,7 +1140,11 @@ impl ExecutionPlan for StatisticsExec { unimplemented!("This plan only serves for testing statistics") } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(if partition.is_some() { Statistics::new_unknown(&self.schema) } else { diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs index f84d79146b24d..2f35edb74f07c 100644 --- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs @@ -45,6 +45,7 @@ mod test { }; use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion_physical_plan::common::compute_record_batch_statistics; + use datafusion_physical_plan::compute_statistics; use datafusion_physical_plan::empty::EmptyExec; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::{ @@ -238,7 +239,7 @@ mod test { async fn test_statistics_by_partition_of_data_source() -> Result<()> { let scan = create_scan_exec_with_statistics(None, Some(2)).await; let statistics = (0..scan.output_partitioning().partition_count()) - .map(|idx| scan.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(scan.as_ref(), Some(idx))) .collect::>>()?; // Partition 1: ids [3,4], dates [2025-03-01, 2025-03-02] let expected_statistic_partition_1 = create_partition_statistics( @@ -282,7 +283,7 @@ mod test { let projection: Arc = Arc::new(ProjectionExec::try_new(exprs, scan)?); let statistics = (0..projection.output_partitioning().partition_count()) - .map(|idx| projection.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(projection.as_ref(), Some(idx))) .collect::>>()?; // Projection only includes id column, not the date partition column let expected_statistic_partition_1 = @@ -314,7 +315,7 @@ mod test { let sort = SortExec::new(ordering.clone().into(), scan_1); let sort_exec: Arc = Arc::new(sort); let statistics = (0..sort_exec.output_partitioning().partition_count()) - .map(|idx| sort_exec.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(sort_exec.as_ref(), Some(idx))) .collect::>>()?; // All 4 files merged: ids [1-4], dates [2025-03-01, 2025-03-04] let expected_statistic_partition = create_partition_statistics( @@ -353,7 +354,7 @@ mod test { Some((DATE_2025_03_03, DATE_2025_03_04)), ); let statistics = (0..sort_exec.output_partitioning().partition_count()) - .map(|idx| sort_exec.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(sort_exec.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); assert_eq!(*statistics[0], expected_statistic_partition_1); @@ -380,7 +381,7 @@ mod test { )?; let filter: Arc = Arc::new(FilterExec::try_new(predicate, scan)?); - let full_statistics = filter.partition_statistics(None)?; + let full_statistics = compute_statistics(filter.as_ref(), None)?; // Filter preserves original total_rows and byte_size from input // (4 total rows = 2 partitions * 2 rows each, byte_size = 4 * 4 = 16 bytes for int32) let expected_full_statistic = Statistics { @@ -408,7 +409,7 @@ mod test { assert_eq!(*full_statistics, expected_full_statistic); let statistics = (0..filter.output_partitioning().partition_count()) - .map(|idx| filter.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(filter.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); // Per-partition stats: each partition has 2 rows, byte_size = 2 * 4 = 8 @@ -445,7 +446,7 @@ mod test { let union_exec: Arc = UnionExec::try_new(vec![scan.clone(), scan])?; let statistics = (0..union_exec.output_partitioning().partition_count()) - .map(|idx| union_exec.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(union_exec.as_ref(), Some(idx))) .collect::>>()?; // Check that we have 4 partitions (2 from each scan) assert_eq!(statistics.len(), 4); @@ -508,7 +509,7 @@ mod test { // Verify the result of partition statistics let stats = (0..interleave.output_partitioning().partition_count()) - .map(|idx| interleave.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(interleave.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(stats.len(), 2); @@ -554,7 +555,7 @@ mod test { let cross_join: Arc = Arc::new(CrossJoinExec::new(left_scan, right_scan)); let statistics = (0..cross_join.output_partitioning().partition_count()) - .map(|idx| cross_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(cross_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have 2 partitions assert_eq!(statistics.len(), 2); @@ -661,7 +662,7 @@ mod test { // Test partition_statistics(None) - returns overall statistics // For RightSemi join, output columns come from right side only - let full_statistics = nested_loop_join.partition_statistics(None)?; + let full_statistics = compute_statistics(nested_loop_join.as_ref(), None)?; // With empty join columns, estimate_join_statistics returns Inexact row count // based on the outer side (right side for RightSemi) let mut expected_full_statistics = create_partition_statistics( @@ -699,7 +700,7 @@ mod test { expected_statistic_partition_2.total_byte_size = Precision::Absent; let statistics = (0..nested_loop_join.output_partitioning().partition_count()) - .map(|idx| nested_loop_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(nested_loop_join.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); assert_eq!(*statistics[0], expected_statistic_partition_1); @@ -729,7 +730,7 @@ mod test { Some((DATE_2025_03_01, DATE_2025_03_04)), ); let statistics = (0..coalesce_partitions.output_partitioning().partition_count()) - .map(|idx| coalesce_partitions.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(coalesce_partitions.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 1); assert_eq!(*statistics[0], expected_statistic_partition); @@ -746,7 +747,7 @@ mod test { let local_limit: Arc = Arc::new(LocalLimitExec::new(scan.clone(), 1)); let statistics = (0..local_limit.output_partitioning().partition_count()) - .map(|idx| local_limit.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(local_limit.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); let mut expected_0 = Statistics::clone(&statistics[0]); @@ -773,7 +774,7 @@ mod test { let global_limit: Arc = Arc::new(GlobalLimitExec::new(scan.clone(), 0, Some(2))); let statistics = (0..global_limit.output_partitioning().partition_count()) - .map(|idx| global_limit.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(global_limit.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 1); // GlobalLimit takes from first partition: ids [3,4], dates [2025-03-01, 2025-03-02] @@ -832,7 +833,7 @@ mod test { @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]" ); - let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?; + let p0_statistics = compute_statistics(aggregate_exec_partial.as_ref(), Some(0))?; // Aggregate doesn't propagate num_rows and ColumnStatistics byte_size from input let expected_p0_statistics = Statistics { @@ -871,7 +872,7 @@ mod test { ], }; - let p1_statistics = aggregate_exec_partial.partition_statistics(Some(1))?; + let p1_statistics = compute_statistics(aggregate_exec_partial.as_ref(), Some(1))?; assert_eq!(*p1_statistics, expected_p1_statistics); validate_statistics_with_data( @@ -893,10 +894,10 @@ mod test { aggregate_exec_partial.schema(), )?); - let p0_statistics = agg_final.partition_statistics(Some(0))?; + let p0_statistics = compute_statistics(agg_final.as_ref(), Some(0))?; assert_eq!(*p0_statistics, expected_p0_statistics); - let p1_statistics = agg_final.partition_statistics(Some(1))?; + let p1_statistics = compute_statistics(agg_final.as_ref(), Some(1))?; assert_eq!(*p1_statistics, expected_p1_statistics); validate_statistics_with_data( @@ -941,8 +942,14 @@ mod test { ], }; - assert_eq!(empty_stat, *agg_partial.partition_statistics(Some(0))?); - assert_eq!(empty_stat, *agg_partial.partition_statistics(Some(1))?); + assert_eq!( + empty_stat, + *compute_statistics(agg_partial.as_ref(), Some(0))? + ); + assert_eq!( + empty_stat, + *compute_statistics(agg_partial.as_ref(), Some(1))? + ); validate_statistics_with_data( agg_partial.clone(), vec![ExpectedStatistics::Empty, ExpectedStatistics::Empty], @@ -968,8 +975,14 @@ mod test { agg_partial.schema(), )?); - assert_eq!(empty_stat, *agg_final.partition_statistics(Some(0))?); - assert_eq!(empty_stat, *agg_final.partition_statistics(Some(1))?); + assert_eq!( + empty_stat, + *compute_statistics(agg_final.as_ref(), Some(0))? + ); + assert_eq!( + empty_stat, + *compute_statistics(agg_final.as_ref(), Some(1))? + ); validate_statistics_with_data( agg_final, @@ -1005,7 +1018,10 @@ mod test { column_statistics: vec![ColumnStatistics::new_unknown()], }; - assert_eq!(expect_stat, *agg_final.partition_statistics(Some(0))?); + assert_eq!( + expect_stat, + *compute_statistics(agg_final.as_ref(), Some(0))? + ); // Verify that the aggregate final result has exactly one partition with one row let mut partitions = execute_stream_partitioned( @@ -1033,7 +1049,7 @@ mod test { let mut all_batches = vec![]; for (i, partition_stream) in partitions.into_iter().enumerate() { let batches: Vec = partition_stream.try_collect().await?; - let actual = plan.partition_statistics(Some(i))?; + let actual = compute_statistics(plan.as_ref(), Some(i))?; let expected = compute_record_batch_statistics( std::slice::from_ref(&batches), &schema, @@ -1043,7 +1059,7 @@ mod test { all_batches.push(batches); } - let actual = plan.partition_statistics(None)?; + let actual = compute_statistics(plan.as_ref(), None)?; let expected = compute_record_batch_statistics(&all_batches, &schema, None); assert_eq!(*actual, expected); @@ -1060,7 +1076,7 @@ mod test { )?); let statistics = (0..repartition.partitioning().partition_count()) - .map(|idx| repartition.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(repartition.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 3); @@ -1111,7 +1127,7 @@ mod test { Partitioning::RoundRobinBatch(2), )?); - let result = repartition.partition_statistics(Some(2)); + let result = compute_statistics(repartition.as_ref(), Some(2)); assert!(result.is_err()); let error = result.unwrap_err(); assert!( @@ -1140,7 +1156,7 @@ mod test { Partitioning::RoundRobinBatch(0), )?); - let result = repartition.partition_statistics(Some(0))?; + let result = compute_statistics(repartition.as_ref(), Some(0))?; assert_eq!(*result, Statistics::new_unknown(&scan_schema)); // Verify that the result has exactly 0 partitions @@ -1167,7 +1183,7 @@ mod test { // Verify the result of partition statistics of repartition let stats = (0..repartition.partitioning().partition_count()) - .map(|idx| repartition.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(repartition.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(stats.len(), 2); @@ -1225,7 +1241,7 @@ mod test { // Verify partition statistics are properly propagated (not unknown) let statistics = (0..window_agg.output_partitioning().partition_count()) - .map(|idx| window_agg.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(window_agg.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); @@ -1311,7 +1327,7 @@ mod test { // Try to test with single partition let empty_single = Arc::new(EmptyExec::new(Arc::clone(&schema))); - let stats = empty_single.partition_statistics(Some(0))?; + let stats = compute_statistics(empty_single.as_ref(), Some(0))?; assert_eq!(stats.num_rows, Precision::Exact(0)); assert_eq!(stats.total_byte_size, Precision::Exact(0)); assert_eq!(stats.column_statistics.len(), 2); @@ -1326,7 +1342,7 @@ mod test { assert_eq!(col_stat.byte_size, Precision::Exact(0)); } - let overall_stats = empty_single.partition_statistics(None)?; + let overall_stats = compute_statistics(empty_single.as_ref(), None)?; assert_eq!(stats, overall_stats); validate_statistics_with_data(empty_single, vec![ExpectedStatistics::Empty], 0) @@ -1337,7 +1353,7 @@ mod test { Arc::new(EmptyExec::new(Arc::clone(&schema)).with_partitions(3)); let statistics = (0..empty_multi.output_partitioning().partition_count()) - .map(|idx| empty_multi.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(empty_multi.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 3); @@ -1397,7 +1413,7 @@ mod test { // Test partition statistics for CollectLeft mode let statistics = (0..collect_left_join.output_partitioning().partition_count()) - .map(|idx| collect_left_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(collect_left_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have the expected number of partitions @@ -1473,7 +1489,7 @@ mod test { // Test partition statistics for Partitioned mode let statistics = (0..partitioned_join.output_partitioning().partition_count()) - .map(|idx| partitioned_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(partitioned_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have the expected number of partitions @@ -1547,7 +1563,7 @@ mod test { // Test partition statistics for Auto mode let statistics = (0..auto_join.output_partitioning().partition_count()) - .map(|idx| auto_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(auto_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have the expected number of partitions diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs index 4b6db1abc45d8..2e516917f8d63 100644 --- a/datafusion/core/tests/physical_optimizer/test_utils.rs +++ b/datafusion/core/tests/physical_optimizer/test_utils.rs @@ -70,7 +70,7 @@ use datafusion_physical_plan::union::UnionExec; use datafusion_physical_plan::windows::{BoundedWindowAggExec, create_window_expr}; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, InputOrderMode, Partitioning, - PlanProperties, SortOrderPushdownResult, displayable, + PlanProperties, SortOrderPushdownResult, StatisticsContext, displayable, }; /// Create a non sorted parquet exec @@ -949,7 +949,11 @@ impl ExecutionPlan for TestScan { internal_err!("TestScan is for testing optimizer only, not for execution") } - fn partition_statistics(&self, _partition: Option) -> Result> { + fn partition_statistics( + &self, + _partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(Statistics::new_unknown(&self.schema))) } diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs index 2eff1c262f855..8b7b9265f20fc 100644 --- a/datafusion/core/tests/sql/path_partition.rs +++ b/datafusion/core/tests/sql/path_partition.rs @@ -38,6 +38,7 @@ use datafusion_common::ScalarValue; use datafusion_common::stats::Precision; use datafusion_common::test_util::batches_to_sort_string; use datafusion_execution::config::SessionConfig; +use datafusion_physical_plan::compute_statistics; use async_trait::async_trait; use bytes::Bytes; @@ -461,8 +462,7 @@ async fn parquet_statistics() -> Result<()> { let schema = physical_plan.schema(); assert_eq!(schema.fields().len(), 4); - let stat_cols = physical_plan - .partition_statistics(None)? + let stat_cols = compute_statistics(physical_plan.as_ref(), None)? .column_statistics .clone(); assert_eq!(stat_cols.len(), 4); @@ -488,8 +488,7 @@ async fn parquet_statistics() -> Result<()> { let schema = physical_plan.schema(); assert_eq!(schema.fields().len(), 2); - let stat_cols = physical_plan - .partition_statistics(None)? + let stat_cols = compute_statistics(physical_plan.as_ref(), None)? .column_statistics .clone(); assert_eq!(stat_cols.len(), 2); diff --git a/datafusion/datasource/src/file_scan_config/mod.rs b/datafusion/datasource/src/file_scan_config/mod.rs index 04b74528d5ac1..3f9d1105cffa8 100644 --- a/datafusion/datasource/src/file_scan_config/mod.rs +++ b/datafusion/datasource/src/file_scan_config/mod.rs @@ -2246,7 +2246,6 @@ mod tests { // of just the projected ones. use crate::source::DataSourceExec; - use datafusion_physical_plan::ExecutionPlan; // Create a schema with 4 columns let schema = Arc::new(Schema::new(vec![ @@ -2300,7 +2299,8 @@ mod tests { let exec = DataSourceExec::from_data_source(config); // Get statistics for partition 0 - let partition_stats = exec.partition_statistics(Some(0)).unwrap(); + let partition_stats = + datafusion_physical_plan::compute_statistics(exec.as_ref(), Some(0)).unwrap(); // Verify that only 2 columns are in the statistics (the projected ones) assert_eq!( diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs index 9f4f8aa0f3635..bee4735777ab9 100644 --- a/datafusion/datasource/src/memory.rs +++ b/datafusion/datasource/src/memory.rs @@ -1000,7 +1000,7 @@ mod tests { let values = MemorySourceConfig::try_new_as_values(schema, data)?; assert_eq!( - *values.partition_statistics(None)?, + *datafusion_physical_plan::compute_statistics(values.as_ref(), None)?, Statistics { num_rows: Precision::Exact(rows), total_byte_size: Precision::Exact(8), // not important diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs index 420c6b508ce4f..d30c6bd7d3542 100644 --- a/datafusion/datasource/src/source.rs +++ b/datafusion/datasource/src/source.rs @@ -46,6 +46,7 @@ use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use datafusion_physical_plan::SortOrderPushdownResult; +use datafusion_physical_plan::StatisticsContext; use datafusion_physical_plan::filter_pushdown::{ ChildPushdownResult, FilterPushdownPhase, FilterPushdownPropagation, PushedDown, }; @@ -451,7 +452,11 @@ impl ExecutionPlan for DataSourceExec { Some(metrics) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { self.data_source.partition_statistics(partition) } diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs index 75da1873263d8..da7f0a5e48c60 100644 --- a/datafusion/physical-optimizer/src/aggregate_statistics.rs +++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs @@ -24,7 +24,7 @@ use datafusion_physical_plan::aggregates::{AggregateExec, AggregateInputMode}; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr}; use datafusion_physical_plan::udaf::{AggregateFunctionExpr, StatisticsArgs}; -use datafusion_physical_plan::{ExecutionPlan, expressions}; +use datafusion_physical_plan::{ExecutionPlan, compute_statistics, expressions}; use std::sync::Arc; use crate::PhysicalOptimizerRule; @@ -53,7 +53,7 @@ impl PhysicalOptimizerRule for AggregateStatistics { let partial_agg_exec = partial_agg_exec .downcast_ref::() .expect("take_optimizable() ensures that this is a AggregateExec"); - let stats = partial_agg_exec.input().partition_statistics(None)?; + let stats = compute_statistics(partial_agg_exec.input().as_ref(), None)?; let mut projections = vec![]; for expr in partial_agg_exec.aggr_expr() { let field = expr.field(); diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs index 87d0943610971..25113b39cb9d4 100644 --- a/datafusion/physical-optimizer/src/enforce_distribution.rs +++ b/datafusion/physical-optimizer/src/enforce_distribution.rs @@ -59,7 +59,9 @@ use datafusion_physical_plan::tree_node::PlanContext; use datafusion_physical_plan::union::{InterleaveExec, UnionExec, can_interleave}; use datafusion_physical_plan::windows::WindowAggExec; use datafusion_physical_plan::windows::{BoundedWindowAggExec, get_best_fitting_window}; -use datafusion_physical_plan::{Distribution, ExecutionPlan, Partitioning}; +use datafusion_physical_plan::{ + Distribution, ExecutionPlan, Partitioning, compute_statistics, +}; use itertools::izip; @@ -1137,7 +1139,8 @@ fn get_repartition_requirement_status( { // Decide whether adding a round robin is beneficial depending on // the statistical information we have on the number of rows: - let roundrobin_beneficial_stats = match child.partition_statistics(None)?.num_rows + let roundrobin_beneficial_stats = match compute_statistics(child.as_ref(), None)? + .num_rows { Precision::Exact(n_rows) => n_rows > batch_size, Precision::Inexact(n_rows) => !should_use_estimates || (n_rows > batch_size), diff --git a/datafusion/physical-optimizer/src/join_selection.rs b/datafusion/physical-optimizer/src/join_selection.rs index 74c6cbb19aea9..c658f83c2b82c 100644 --- a/datafusion/physical-optimizer/src/join_selection.rs +++ b/datafusion/physical-optimizer/src/join_selection.rs @@ -40,7 +40,9 @@ use datafusion_physical_plan::joins::{ StreamJoinPartitionMode, SymmetricHashJoinExec, }; use datafusion_physical_plan::operator_statistics::StatisticsRegistry; -use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; +use datafusion_physical_plan::{ + ExecutionPlan, ExecutionPlanProperties, compute_statistics, +}; use std::sync::Arc; /// The [`JoinSelection`] rule tries to modify a given plan so that it can @@ -65,7 +67,7 @@ fn get_stats( reg.compute(plan) .map(|s| Arc::::clone(s.base_arc())) } else { - plan.partition_statistics(None) + compute_statistics(plan, None) } } diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs index 81df6f943c15e..0fd9047780fc3 100644 --- a/datafusion/physical-optimizer/src/output_requirements.rs +++ b/datafusion/physical-optimizer/src/output_requirements.rs @@ -34,6 +34,7 @@ use datafusion_common::{Result, Statistics}; use datafusion_execution::TaskContext; use datafusion_physical_expr::Distribution; use datafusion_physical_expr_common::sort_expr::OrderingRequirements; +use datafusion_physical_plan::StatisticsContext; use datafusion_physical_plan::execution_plan::Boundedness; use datafusion_physical_plan::projection::{ ProjectionExec, make_with_child, update_expr, update_ordering_requirement, @@ -242,8 +243,12 @@ impl ExecutionPlan for OutputRequirementExec { unreachable!(); } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + Ok(Arc::clone(&ctx.child_stats()[0])) } fn try_swapping_with_projection( diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 76ecb3f1485a4..23fadc250c805 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -30,6 +30,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, PushedDownPredicate, }; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayFormatType, Distribution, ExecutionPlan, InputOrderMode, SendableRecordBatchStream, Statistics, check_if_same_properties, @@ -1555,8 +1556,12 @@ impl ExecutionPlan for AggregateExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let child_statistics = self.input().partition_statistics(partition)?; + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let child_statistics = Arc::clone(&ctx.child_stats()[0]); Ok(Arc::new(self.statistics_inner(&child_statistics)?)) } @@ -2180,6 +2185,7 @@ mod tests { use crate::execution_plan::Boundedness; use crate::expressions::col; use crate::metrics::MetricValue; + use crate::statistics_context::compute_statistics; use crate::test::TestMemoryExec; use crate::test::assert_is_pending; use crate::test::exec::{ @@ -2561,7 +2567,7 @@ mod tests { )?); // Verify statistics are preserved proportionally through aggregation - let final_stats = merged_aggregate.partition_statistics(None)?; + let final_stats = compute_statistics(merged_aggregate.as_ref(), None)?; assert!(final_stats.total_byte_size.get_value().is_some()); let task_ctx = if spill { @@ -2706,6 +2712,7 @@ mod tests { fn partition_statistics( &self, partition: Option, + _ctx: &StatisticsContext, ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(self.schema().as_ref()))); @@ -3910,7 +3917,7 @@ mod tests { PhysicalGroupBy::default(), None, )?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!(stats.total_byte_size, Precision::Absent); let zero_row_stats = Statistics { @@ -3927,7 +3934,7 @@ mod tests { PhysicalGroupBy::default(), None, )?; - let stats_zero = agg_zero.partition_statistics(None)?; + let stats_zero = compute_statistics(&agg_zero, None)?; assert_eq!(stats_zero.total_byte_size, Precision::Absent); Ok(()) @@ -4280,7 +4287,7 @@ mod tests { let agg = build_test_aggregate(&schema, input_stats, group_by, case.limit_options)?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!( stats.num_rows, case.expected_num_rows, "FAILED: '{}' — expected {:?}, got {:?}", @@ -4319,7 +4326,7 @@ mod tests { None, )?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!( stats.column_statistics[0].distinct_count, Precision::Exact(100), @@ -4373,7 +4380,7 @@ mod tests { let agg = build_test_aggregate(&schema, input_stats, grouping_set, None)?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; // Per-set NDV: (a,NULL)=100, (NULL,b)=50, (a,b)=100*50=5000 // Total = 100 + 50 + 5000 = 5150 assert_eq!( @@ -4422,7 +4429,7 @@ mod tests { PhysicalGroupBy::new_single(vec![(expr_a_plus_b, "a+b".to_string())]); let agg = build_test_aggregate(&schema, input_stats, group_by, None)?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!( stats.num_rows, Precision::Inexact(1_000_000), diff --git a/datafusion/physical-plan/src/buffer.rs b/datafusion/physical-plan/src/buffer.rs index 0cc4a1d71814e..f7d3ca80cb091 100644 --- a/datafusion/physical-plan/src/buffer.rs +++ b/datafusion/physical-plan/src/buffer.rs @@ -24,6 +24,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, }; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use crate::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SortOrderPushdownResult, @@ -244,8 +245,12 @@ impl ExecutionPlan for BufferExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + Ok(Arc::clone(&ctx.child_stats()[0])) } fn supports_limit_pushdown(&self) -> bool { diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 2bf046f03b6cf..c0d155073a0d4 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -24,6 +24,7 @@ use std::task::{Context, Poll}; use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics}; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream, check_if_same_properties, @@ -222,8 +223,12 @@ impl ExecutionPlan for CoalesceBatchesExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); Ok(Arc::new(stats.with_fetch(self.fetch, 0, 1)?)) } diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index 9290d725165e9..275054a40fde8 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -30,6 +30,7 @@ use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType}; use crate::filter_pushdown::{FilterDescription, FilterPushdownPhase}; use crate::projection::{ProjectionExec, make_with_child}; use crate::sort_pushdown::SortOrderPushdownResult; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::{DisplayFormatType, ExecutionPlan, Partitioning, check_if_same_properties}; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; @@ -239,8 +240,14 @@ impl ExecutionPlan for CoalescePartitionsExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, _partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(None)?); + fn partition_statistics( + &self, + _partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { + // CoalescePartitions merges all input partitions into one, + // so it always needs overall (None) input stats + let stats = Arc::unwrap_or_clone(compute_statistics(self.input.as_ref(), None)?); Ok(Arc::new(stats.with_fetch(self.fetch, 0, 1)?)) } diff --git a/datafusion/physical-plan/src/coop.rs b/datafusion/physical-plan/src/coop.rs index fe6a3bc3d5678..f1e20e0c59d48 100644 --- a/datafusion/physical-plan/src/coop.rs +++ b/datafusion/physical-plan/src/coop.rs @@ -85,6 +85,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, }; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, RecordBatchStream, SendableRecordBatchStream, SortOrderPushdownResult, check_if_same_properties, @@ -306,8 +307,12 @@ impl ExecutionPlan for CooperativeExec { Ok(make_cooperative(child_stream)) } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + Ok(Arc::clone(&ctx.child_stats()[0])) } fn supports_limit_pushdown(&self) -> bool { diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs index 756a68b1a958d..40eebc1a835e0 100644 --- a/datafusion/physical-plan/src/display.rs +++ b/datafusion/physical-plan/src/display.rs @@ -31,6 +31,8 @@ use datafusion_physical_expr::LexOrdering; use crate::metrics::{MetricCategory, MetricType}; use crate::render_tree::RenderTree; +use crate::statistics_context::compute_statistics; + use super::{ExecutionPlan, ExecutionPlanVisitor, accept}; /// Options for controlling how each [`ExecutionPlan`] should format itself @@ -480,7 +482,7 @@ impl ExecutionPlanVisitor for IndentVisitor<'_, '_> { } } if self.show_statistics { - let stats = plan.partition_statistics(None).map_err(|_e| fmt::Error)?; + let stats = compute_statistics(plan, None).map_err(|_e| fmt::Error)?; write!(self.f, ", statistics=[{stats}]")?; } if self.show_schema { @@ -576,7 +578,7 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> { }; let statistics = if self.show_statistics { - let stats = plan.partition_statistics(None).map_err(|_e| fmt::Error)?; + let stats = compute_statistics(plan, None).map_err(|_e| fmt::Error)?; format!("statistics=[{stats}]") } else { "".to_string() @@ -1173,6 +1175,7 @@ mod tests { use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::PhysicalExpr; + use crate::statistics_context::StatisticsContext; use crate::{DisplayAs, ExecutionPlan, PlanProperties}; use super::DisplayableExecutionPlan; @@ -1232,6 +1235,7 @@ mod tests { fn partition_statistics( &self, partition: Option, + _ctx: &StatisticsContext, ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(self.schema().as_ref()))); diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs index 8103695ad08fa..9608615a3380a 100644 --- a/datafusion/physical-plan/src/empty.rs +++ b/datafusion/physical-plan/src/empty.rs @@ -35,6 +35,7 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr}; use crate::execution_plan::SchedulingType; +use crate::statistics_context::StatisticsContext; use log::trace; /// Execution plan for empty relation with produce_one_row=false @@ -159,7 +160,11 @@ impl ExecutionPlan for EmptyExec { )?)) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if let Some(partition) = partition { assert_or_internal_err!( partition < self.partitions, diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index 1a67ea0ded11b..a53d64bd6c958 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -47,6 +47,7 @@ use crate::coalesce_partitions::CoalescePartitionsExec; use crate::display::DisplayableExecutionPlan; use crate::metrics::MetricsSet; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use arrow::array::{Array, RecordBatch}; @@ -549,7 +550,19 @@ pub trait ExecutionPlan: Any + Debug + DisplayAs + Send + Sync { /// If statistics are not available, should return [`Statistics::new_unknown`] /// (the default), not an error. /// If `partition` is `None`, it returns statistics for the entire plan. - fn partition_statistics(&self, partition: Option) -> Result> { + /// + /// The [`StatisticsContext`] carries pre-computed child statistics and + /// additional context for statistics computation. Use + /// [`compute_statistics`] to compute statistics bottom-up, threading + /// child statistics through the context automatically. + /// + /// [`StatisticsContext`]: crate::statistics_context::StatisticsContext + /// [`compute_statistics`]: crate::statistics_context::compute_statistics + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if let Some(idx) = partition { // Validate partition index let partition_count = self.properties().partitioning.partition_count(); @@ -1658,6 +1671,7 @@ mod tests { fn partition_statistics( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { unimplemented!() } @@ -1727,6 +1741,7 @@ mod tests { fn partition_statistics( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { unimplemented!() } @@ -1791,6 +1806,7 @@ mod tests { fn partition_statistics( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { unimplemented!() } diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 50efe8f5092e8..67590df5fef2f 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -42,6 +42,7 @@ use crate::projection::{ EmbeddedProjection, ProjectionExec, ProjectionExpr, make_with_child, try_embed_projection, update_expr, }; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::{ DisplayFormatType, ExecutionPlan, metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RatioMetrics}, @@ -400,7 +401,7 @@ impl FilterExec { let schema = input.schema(); let stats = Self::statistics_helper( &schema, - Arc::unwrap_or_clone(input.partition_statistics(None)?), + Arc::unwrap_or_clone(compute_statistics(input.as_ref(), None)?), predicate, default_selectivity, )?; @@ -576,9 +577,12 @@ impl ExecutionPlan for FilterExec { /// The output statistics of a filtering operation can be estimated if the /// predicate's selectivity value can be determined for the incoming data. - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stats = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); let stats = Self::statistics_helper( &self.input.schema(), input_stats, @@ -1152,6 +1156,7 @@ mod tests { use super::*; use crate::empty::EmptyExec; use crate::expressions::*; + use crate::statistics_context::compute_statistics; use crate::test; use crate::test::exec::StatisticsExec; use arrow::datatypes::{Field, Schema, UnionFields, UnionMode}; @@ -1228,7 +1233,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(25)); assert_eq!( statistics.total_byte_size, @@ -1278,7 +1283,7 @@ mod tests { sub_filter, )?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(16)); assert_eq!( statistics.column_statistics, @@ -1338,7 +1343,7 @@ mod tests { binary(col("a", &schema)?, Operator::GtEq, lit(10i32), &schema)?, b_gt_5, )?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // On a uniform distribution, only fifteen rows will satisfy the // filter that 'a' proposed (a >= 10 AND a <= 25) (15/100) and only // 5 rows will satisfy the filter that 'b' proposed (b > 45) (5/50). @@ -1383,7 +1388,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Absent); Ok(()) @@ -1456,7 +1461,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // 0.5 (from a) * 0.333333... (from b) * 0.798387... (from c) ≈ 0.1330... // num_rows after ceil => 133.0... => 134 // total_byte_size after ceil => 532.0... => 533 @@ -1552,10 +1557,12 @@ mod tests { )), )); // Since filter predicate passes all entries, statistics after filter shouldn't change. - let expected = input.partition_statistics(None)?.column_statistics.clone(); + let expected = compute_statistics(input.as_ref(), None)? + .column_statistics + .clone(); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(1000)); assert_eq!(statistics.total_byte_size, Precision::Inexact(4000)); @@ -1608,7 +1615,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(0)); assert_eq!(statistics.total_byte_size, Precision::Inexact(0)); @@ -1695,7 +1702,7 @@ mod tests { Arc::new(FilterExec::try_new(outer_predicate, inner_filter)?); // Should succeed without error - let statistics = outer_filter.partition_statistics(None)?; + let statistics = compute_statistics(outer_filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(0)); Ok(()) @@ -1734,7 +1741,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(490)); assert_eq!(statistics.total_byte_size, Precision::Inexact(1960)); @@ -1784,7 +1791,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let filter_statistics = filter.partition_statistics(None)?; + let filter_statistics = compute_statistics(filter.as_ref(), None)?; let expected_filter_statistics = Statistics { num_rows: Precision::Absent, @@ -1819,7 +1826,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let filter_statistics = filter.partition_statistics(None)?; + let filter_statistics = compute_statistics(filter.as_ref(), None)?; // First column is "a", and it is a column with only one value after the filter. assert!(filter_statistics.column_statistics[0].is_singleton()); @@ -1866,11 +1873,11 @@ mod tests { Arc::new(Literal::new(ScalarValue::Decimal128(Some(10), 10, 10))), )); let filter = FilterExec::try_new(predicate, input)?; - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(&filter, None)?; assert_eq!(statistics.num_rows, Precision::Inexact(200)); assert_eq!(statistics.total_byte_size, Precision::Inexact(800)); let filter = filter.with_default_selectivity(40)?; - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(&filter, None)?; assert_eq!(statistics.num_rows, Precision::Inexact(400)); assert_eq!(statistics.total_byte_size, Precision::Inexact(1600)); Ok(()) @@ -1905,7 +1912,7 @@ mod tests { Arc::new(EmptyExec::new(Arc::clone(&schema))), )?; - exec.partition_statistics(None).unwrap(); + compute_statistics(&exec, None).unwrap(); Ok(()) } @@ -2061,8 +2068,8 @@ mod tests { assert_eq!(filter1.projection(), filter2.projection()); // Verify statistics are the same - let stats1 = filter1.partition_statistics(None)?; - let stats2 = filter2.partition_statistics(None)?; + let stats1 = compute_statistics(&filter1, None)?; + let stats2 = compute_statistics(&filter2, None)?; assert_eq!(stats1.num_rows, stats2.num_rows); assert_eq!(stats1.total_byte_size, stats2.total_byte_size); @@ -2115,7 +2122,7 @@ mod tests { .unwrap() .build()?; - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(&filter, None)?; // Verify statistics reflect both filtering and projection assert!(matches!(statistics.num_rows, Precision::Inexact(_))); @@ -2346,7 +2353,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; let col_b_stats = &statistics.column_statistics[1]; assert_eq!(col_b_stats.min_value, Precision::Absent); assert_eq!(col_b_stats.max_value, Precision::Absent); @@ -2631,7 +2638,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; for (i, expected) in expected_ndvs.iter().enumerate() { assert_eq!( @@ -2705,7 +2712,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // a = 42 collapses to single value assert_eq!( statistics.column_statistics[0].distinct_count, @@ -2751,7 +2758,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2784,7 +2791,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2817,7 +2824,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2850,7 +2857,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2884,7 +2891,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2930,7 +2937,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -3231,7 +3238,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // Filter estimates ~10 rows (selectivity = 10/100) assert_eq!(statistics.num_rows, Precision::Inexact(10)); // NDV should be capped at the filtered row count (10), not the original 80 diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 3027fb130f087..4781cd6586ce7 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -31,6 +31,7 @@ use crate::projection::{ ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children, physical_to_column_exprs, }; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::{ ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, @@ -380,11 +381,22 @@ impl ExecutionPlan for CrossJoinExec { } } - fn partition_statistics(&self, partition: Option) -> Result> { - // Get the all partitions statistics of the left - let left_stats = Arc::unwrap_or_clone(self.left.partition_statistics(None)?); - let right_stats = - Arc::unwrap_or_clone(self.right.partition_statistics(partition)?); + fn partition_statistics( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + // Left side is always broadcast (collected into a single partition), + // so it needs overall stats. For the Some case, the context has + // partition-specific stats which would be incorrect. + // Right side can have multiple partitions, so the context's + // partition-specific or overall stats are correct as-is. + let left_stats = match partition { + Some(_) => compute_statistics(self.left.as_ref(), None)?, + None => Arc::clone(&ctx.child_stats()[0]), + }; + let left_stats = Arc::unwrap_or_clone(left_stats); + let right_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[1])); Ok(Arc::new(stats_cartesian_product(left_stats, right_stats))) } diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 0eca270ebb06f..c38d75319e946 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -53,6 +53,7 @@ use crate::projection::{ }; use crate::repartition::REPARTITION_RANDOM_STATE; use crate::spill::get_record_batch_memory_size; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, @@ -1440,14 +1441,19 @@ impl ExecutionPlan for HashJoinExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { let stats = match (partition, self.mode) { - // For CollectLeft mode, the left side is collected into a single partition, - // so all left partitions are available to each output partition. - // For the right side, we need the specific partition statistics. - (Some(partition), PartitionMode::CollectLeft) => { - let left_stats = self.left.partition_statistics(None)?; - let right_stats = self.right.partition_statistics(Some(partition))?; + // For CollectLeft mode, the left side is broadcast (collected into + // a single partition), so it needs overall stats. The context has + // partition-specific stats which would be incorrect. + // Right side is partitioned, so the context stats are correct. + (Some(_), PartitionMode::CollectLeft) => { + let left_stats = compute_statistics(self.left.as_ref(), None)?; + let right_stats = Arc::clone(&ctx.child_stats()[1]); estimate_join_statistics( (*left_stats).clone(), @@ -1458,12 +1464,25 @@ impl ExecutionPlan for HashJoinExec { )? } - // For Partitioned mode, both sides are partitioned, so each output partition - // only has access to the corresponding partition from both sides. - (Some(partition), PartitionMode::Partitioned) => { - let left_stats = self.left.partition_statistics(Some(partition))?; - let right_stats = self.right.partition_statistics(Some(partition))?; + // For Partitioned mode, both sides are hash-partitioned symmetrically, + // so each output partition uses the matching partition from both sides. + (Some(_), PartitionMode::Partitioned) => { + let left_stats = Arc::clone(&ctx.child_stats()[0]); + let right_stats = Arc::clone(&ctx.child_stats()[1]); + + estimate_join_statistics( + (*left_stats).clone(), + (*right_stats).clone(), + &self.on, + &self.join_type, + &self.join_schema, + )? + } + // Overall stats requested, context has overall child stats. + (None, _) => { + let left_stats = Arc::clone(&ctx.child_stats()[0]); + let right_stats = Arc::clone(&ctx.child_stats()[1]); estimate_join_statistics( (*left_stats).clone(), (*right_stats).clone(), @@ -1473,14 +1492,11 @@ impl ExecutionPlan for HashJoinExec { )? } - // For Auto mode or when no specific partition is requested, fall back to - // the current behavior of getting all partition statistics. - (None, _) | (Some(_), PartitionMode::Auto) => { - // TODO stats: it is not possible in general to know the output size of joins - // There are some special cases though, for example: - // - `A LEFT JOIN B ON A.col=B.col` with `COUNT_DISTINCT(B.col)=COUNT(B.col)` - let left_stats = self.left.partition_statistics(None)?; - let right_stats = self.right.partition_statistics(None)?; + // Auto mode hasn't decided partitioning yet, so it needs + // overall stats from both sides. + (Some(_), PartitionMode::Auto) => { + let left_stats = compute_statistics(self.left.as_ref(), None)?; + let right_stats = compute_statistics(self.right.as_ref(), None)?; estimate_join_statistics( (*left_stats).clone(), (*right_stats).clone(), diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index c686d9d5a05ff..7862045ad8b59 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -42,6 +42,7 @@ use crate::projection::{ EmbeddedProjection, JoinData, ProjectionExec, try_embed_projection, try_pushdown_through_join, }; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, @@ -687,7 +688,11 @@ impl ExecutionPlan for NestedLoopJoinExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { // NestedLoopJoinExec is designed for joins without equijoin keys in the // ON clause (e.g., `t1 JOIN t2 ON (t1.v1 + t2.v1) % 2 = 0`). Any join // predicates are stored in `self.filter`, but `estimate_join_statistics` @@ -697,15 +702,17 @@ impl ExecutionPlan for NestedLoopJoinExec { // unknown row counts. let join_columns = Vec::new(); - // Left side is always a single partition (Distribution::SinglePartition), - // so we always request overall stats with `None`. Right side can have - // multiple partitions, so we forward the partition parameter to get - // partition-specific statistics when requested. - let left_stats = Arc::unwrap_or_clone(self.left.partition_statistics(None)?); - let right_stats = Arc::unwrap_or_clone(match partition { - Some(partition) => self.right.partition_statistics(Some(partition))?, - None => self.right.partition_statistics(None)?, - }); + // Left side is always broadcast (collected into a single partition), + // so it needs overall stats. For the Some case, the context has + // partition-specific stats which would be incorrect. + // Right side can have multiple partitions, so the context's + // partition-specific or overall stats are correct as-is. + let left_stats = match partition { + Some(_) => compute_statistics(self.left.as_ref(), None)?, + None => Arc::clone(&ctx.child_stats()[0]), + }; + let left_stats = Arc::unwrap_or_clone(left_stats); + let right_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[1])); let stats = estimate_join_statistics( left_stats, @@ -3144,7 +3151,7 @@ pub(crate) mod tests { &JoinType::Left, Some(vec![1, 2]), )?; - let stats = nested_loop_join.partition_statistics(None)?; + let stats = compute_statistics(&nested_loop_join, None)?; assert_eq!( nested_loop_join.schema().fields().len(), stats.column_statistics.len(), diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs index 3f309431614a4..6422259cac4a5 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs @@ -38,6 +38,7 @@ use crate::projection::{ physical_to_column_exprs, update_join_on, }; use crate::spill::spill_manager::SpillManager; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, SendableRecordBatchStream, Statistics, check_if_same_properties, @@ -581,21 +582,23 @@ impl ExecutionPlan for SortMergeJoinExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { // SortMergeJoinExec uses symmetric hash partitioning where both left and right // inputs are hash-partitioned on the join keys. This means partition `i` of the // left input is joined with partition `i` of the right input. // - // Therefore, partition-specific statistics can be computed by getting the - // partition-specific statistics from both children and combining them via - // `estimate_join_statistics`. + // Both children use the same partition argument, so ctx.child_stats() is correct + // for both the None and Some(partition) cases. // // TODO stats: it is not possible in general to know the output size of joins // There are some special cases though, for example: // - `A LEFT JOIN B ON A.col=B.col` with `COUNT_DISTINCT(B.col)=COUNT(B.col)` - let left_stats = Arc::unwrap_or_clone(self.left.partition_statistics(partition)?); - let right_stats = - Arc::unwrap_or_clone(self.right.partition_statistics(partition)?); + let left_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); + let right_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[1])); Ok(Arc::new(estimate_join_statistics( left_stats, right_stats, diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs index 5d70530528728..e7daca9692288 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs @@ -3148,7 +3148,6 @@ async fn test_left_outer_join_filtered_mask() -> Result<()> { #[test] fn test_partition_statistics() -> Result<()> { - use crate::ExecutionPlan; use datafusion_common::stats::Precision; let left = build_table( @@ -3185,7 +3184,7 @@ fn test_partition_statistics() -> Result<()> { // Test aggregate statistics (partition = None) // Should return meaningful statistics computed from both inputs - let stats = join_exec.partition_statistics(None)?; + let stats = crate::statistics_context::compute_statistics(&join_exec, None)?; assert_eq!( stats.column_statistics.len(), expected_cols, @@ -3203,7 +3202,8 @@ fn test_partition_statistics() -> Result<()> { // Since the child TestMemoryExec returns unknown stats for specific partitions, // the join output will also have Absent num_rows. This is expected behavior // as the statistics depend on what the children can provide. - let partition_stats = join_exec.partition_statistics(Some(0))?; + let partition_stats = + crate::statistics_context::compute_statistics(&join_exec, Some(0))?; assert_eq!( partition_stats.column_statistics.len(), expected_cols, diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index 54fc97c154206..ed6825b66beec 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -49,6 +49,7 @@ pub use crate::execution_plan::{ pub use crate::metrics::Metric; pub use crate::ordering::InputOrderMode; pub use crate::sort_pushdown::SortOrderPushdownResult; +pub use crate::statistics_context::{StatisticsContext, compute_statistics}; pub use crate::stream::EmptyRecordBatchStream; pub use crate::topk::TopK; pub use crate::visitor::{ExecutionPlanVisitor, accept, visit_execution_plan}; @@ -88,6 +89,7 @@ pub mod repartition; pub mod sort_pushdown; pub mod sorts; pub mod spill; +pub mod statistics_context; pub mod stream; pub mod streaming; pub mod tree_node; diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index 51bef5d24bd2d..ee21f42911dc8 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -27,6 +27,7 @@ use super::{ SendableRecordBatchStream, Statistics, }; use crate::execution_plan::{Boundedness, CardinalityEffect}; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayFormatType, Distribution, ExecutionPlan, Partitioning, check_if_same_properties, @@ -234,8 +235,12 @@ impl ExecutionPlan for GlobalLimitExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); Ok(Arc::new(stats.with_fetch(self.fetch, self.skip, 1)?)) } @@ -411,8 +416,12 @@ impl ExecutionPlan for LocalLimitExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); Ok(Arc::new(stats.with_fetch(Some(self.fetch), 0, 1)?)) } @@ -559,6 +568,7 @@ mod tests { use super::*; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::common::collect; + use crate::statistics_context::compute_statistics; use crate::test; use crate::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}; @@ -837,7 +847,7 @@ mod tests { let offset = GlobalLimitExec::new(Arc::new(CoalescePartitionsExec::new(csv)), skip, fetch); - Ok(offset.partition_statistics(None)?.num_rows) + Ok(compute_statistics(&offset, None)?.num_rows) } pub fn build_group_by( @@ -877,7 +887,7 @@ mod tests { fetch, ); - Ok(offset.partition_statistics(None)?.num_rows) + Ok(compute_statistics(&offset, None)?.num_rows) } async fn row_number_statistics_for_local_limit( @@ -890,7 +900,7 @@ mod tests { let offset = LocalLimitExec::new(csv, fetch); - Ok(offset.partition_statistics(None)?.num_rows) + Ok(compute_statistics(&offset, None)?.num_rows) } /// Return a RecordBatch with a single array with row_count sz diff --git a/datafusion/physical-plan/src/operator_statistics/mod.rs b/datafusion/physical-plan/src/operator_statistics/mod.rs index 20266e9768ebe..47ed5e998fc83 100644 --- a/datafusion/physical-plan/src/operator_statistics/mod.rs +++ b/datafusion/physical-plan/src/operator_statistics/mod.rs @@ -95,6 +95,7 @@ use datafusion_common::stats::Precision; use datafusion_common::{Result, Statistics}; use crate::ExecutionPlan; +use crate::statistics_context::compute_statistics; // ============================================================================ // ExtendedStatistics: Statistics with type-safe extensions @@ -271,7 +272,7 @@ impl StatisticsProvider for DefaultStatisticsProvider { plan: &dyn ExecutionPlan, _child_stats: &[ExtendedStatistics], ) -> Result { - let base = plan.partition_statistics(None)?; + let base = compute_statistics(plan, None)?; Ok(StatisticsResult::Computed(ExtendedStatistics::new_arc( base, ))) @@ -363,7 +364,7 @@ impl StatisticsRegistry { pub fn compute(&self, plan: &dyn ExecutionPlan) -> Result { // Fast path: no providers registered, skip the walk entirely if self.providers.is_empty() { - let base = plan.partition_statistics(None)?; + let base = compute_statistics(plan, None)?; return Ok(ExtendedStatistics::new_arc(base)); } @@ -387,7 +388,7 @@ impl StatisticsRegistry { } } // Fallback: use plan's built-in stats - let base = plan.partition_statistics(None)?; + let base = compute_statistics(plan, None)?; Ok(ExtendedStatistics::new_arc(base)) } @@ -510,7 +511,7 @@ fn computed_with_row_count( plan: &dyn ExecutionPlan, num_rows: Precision, ) -> Result { - let mut base = Arc::unwrap_or_clone(plan.partition_statistics(None)?); + let mut base = Arc::unwrap_or_clone(compute_statistics(plan, None)?); rescale_byte_size(&mut base, num_rows); Ok(StatisticsResult::Computed(ExtendedStatistics::new(base))) } @@ -1031,6 +1032,7 @@ mod tests { use super::*; use crate::filter::FilterExec; use crate::projection::ProjectionExec; + use crate::statistics_context::StatisticsContext; use crate::{DisplayAs, DisplayFormatType, PlanProperties}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::stats::Precision; @@ -1140,6 +1142,7 @@ mod tests { fn partition_statistics( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { Ok(Arc::new(self.stats.clone())) } diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs index ae8e73cd74ade..52a70ca811676 100644 --- a/datafusion/physical-plan/src/placeholder_row.rs +++ b/datafusion/physical-plan/src/placeholder_row.rs @@ -35,6 +35,7 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; use datafusion_physical_expr::PhysicalExpr; +use crate::statistics_context::StatisticsContext; use log::trace; /// Execution plan for empty relation with produce_one_row=true @@ -173,7 +174,11 @@ impl ExecutionPlan for PlaceholderRowExec { Ok(Box::pin(cooperative(ms))) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { let batches = self .data() .expect("Create single row placeholder RecordBatch should not fail"); diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index e5b91fbb1c5d4..212a357a343cd 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -33,6 +33,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, FilterRemapper, PushedDownPredicate, }; use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn, JoinOnRef}; +use crate::statistics_context::StatisticsContext; use crate::{DisplayFormatType, ExecutionPlan, PhysicalExpr, check_if_same_properties}; use std::collections::HashMap; use std::pin::Pin; @@ -359,9 +360,12 @@ impl ExecutionPlan for ProjectionExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stats = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); let output_schema = self.schema(); Ok(Arc::new( self.projector @@ -1184,6 +1188,7 @@ mod tests { use crate::common::collect; use crate::filter_pushdown::PushedDown; + use crate::statistics_context::compute_statistics; use crate::test; use crate::test::exec::StatisticsExec; @@ -1374,7 +1379,7 @@ mod tests { let projection = ProjectionExec::try_new(exprs, input).unwrap(); - let stats = projection.partition_statistics(None).unwrap(); + let stats = compute_statistics(&projection, None).unwrap(); assert_eq!(stats.num_rows, Precision::Exact(10)); assert_eq!( diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index d4406360504f9..e0e4f179b806e 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -38,6 +38,7 @@ use crate::projection::{ProjectionExec, all_columns, make_with_child, update_exp use crate::sorts::streaming_merge::StreamingMergeBuilder; use crate::spill::spill_manager::SpillManager; use crate::spill::spill_pool::{self, SpillPoolWriter}; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::stream::RecordBatchStreamAdapter; use crate::{ DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics, @@ -1106,7 +1107,11 @@ impl ExecutionPlan for RepartitionExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { if let Some(partition) = partition { let partition_count = self.partitioning().partition_count(); if partition_count == 0 { @@ -1120,7 +1125,10 @@ impl ExecutionPlan for RepartitionExec { partition_count ); - let mut stats = Arc::unwrap_or_clone(self.input.partition_statistics(None)?); + // Repartition needs overall input stats to divide across + // output partitions, not partition-specific child stats + let overall = compute_statistics(self.input.as_ref(), None)?; + let mut stats = Arc::unwrap_or_clone(overall); // Distribute statistics across partitions stats.num_rows = stats @@ -1143,7 +1151,7 @@ impl ExecutionPlan for RepartitionExec { Ok(Arc::new(stats)) } else { - self.input.partition_statistics(None) + Ok(Arc::clone(&ctx.child_stats()[0])) } } diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index 28b8745235918..2c7045fdd0d39 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -58,6 +58,7 @@ use std::task::{Context, Poll}; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::sorts::sort::sort_batch; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, @@ -345,8 +346,12 @@ impl ExecutionPlan for PartialSortExec { Some(self.metrics_set.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + Ok(Arc::clone(&ctx.child_stats()[0])) } } diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index 6c02af8dec6d3..912c8e9fb7108 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -45,6 +45,7 @@ use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder}; use crate::spill::get_record_batch_memory_size; use crate::spill::in_progress_spill_file::InProgressSpillFile; use crate::spill::spill_manager::{GetSlicedSize, SpillManager}; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::stream::RecordBatchStreamAdapter; use crate::stream::ReservationStream; use crate::topk::TopK; @@ -1276,13 +1277,19 @@ impl ExecutionPlan for SortExec { Some(self.metrics_set.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let p = if !self.preserve_partitioning() { - None + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + // When not preserving partitioning, SortExec merges all input + // partitions into one and needs overall (None) input stats + let child_stats = if self.preserve_partitioning() { + Arc::clone(&ctx.child_stats()[0]) } else { - partition + compute_statistics(self.input.as_ref(), None)? }; - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(p)?); + let stats = Arc::unwrap_or_clone(child_stats); Ok(Arc::new(stats.with_fetch(self.fetch, 0, 1)?)) } diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index 13c28ccb10991..11dcf679e96de 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -24,6 +24,7 @@ use crate::limit::LimitStream; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::{ProjectionExec, make_with_child, update_ordering}; use crate::sorts::streaming_merge::StreamingMergeBuilder; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, @@ -393,8 +394,14 @@ impl ExecutionPlan for SortPreservingMergeExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, _partition: Option) -> Result> { - self.input.partition_statistics(None) + fn partition_statistics( + &self, + _partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { + // SortPreservingMergeExec merges all input partitions into one, + // so it always needs overall (None) input stats + compute_statistics(self.input.as_ref(), None) } fn supports_limit_pushdown(&self) -> bool { diff --git a/datafusion/physical-plan/src/statistics_context.rs b/datafusion/physical-plan/src/statistics_context.rs new file mode 100644 index 0000000000000..85bec6a4f97d5 --- /dev/null +++ b/datafusion/physical-plan/src/statistics_context.rs @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Context for computing statistics in physical plans. +//! +//! [`StatisticsContext`] provides external context to +//! [`ExecutionPlan::partition_statistics`], enabling operators +//! to receive pre-computed child statistics and additional context for +//! statistics computation. + +use crate::ExecutionPlan; +use datafusion_common::Result; +use datafusion_common::Statistics; +use std::sync::Arc; + +/// Context passed to [`ExecutionPlan::partition_statistics`] +/// carrying external information that operators can use when computing +/// their statistics. +/// +/// # Example +/// +/// ```ignore +/// use datafusion_physical_plan::statistics_context::StatisticsContext; +/// +/// // Leaf node: no children +/// let leaf_ctx = StatisticsContext::empty(); +/// +/// // Parent node: receives pre-computed child stats +/// let child_stats = vec![child1_stats, child2_stats]; +/// let parent_ctx = StatisticsContext::new(child_stats); +/// ``` +#[derive(Debug, Clone)] +pub struct StatisticsContext { + /// Pre-computed statistics for each child of the current node, + /// in the same order as [`ExecutionPlan::children`]. + child_stats: Vec>, +} + +impl StatisticsContext { + /// Creates a new context with pre-computed child statistics. + pub fn new(child_stats: Vec>) -> Self { + Self { child_stats } + } + + /// Creates an empty context (for leaf nodes or when child stats + /// are not available). + pub fn empty() -> Self { + Self { + child_stats: Vec::new(), + } + } + + /// Returns the pre-computed statistics for each child node. + pub fn child_stats(&self) -> &[Arc] { + &self.child_stats + } +} + +impl Default for StatisticsContext { + fn default() -> Self { + Self::empty() + } +} + +/// Computes statistics for a plan node by first recursively computing +/// statistics for all children, then calling +/// [`ExecutionPlan::partition_statistics`] with the pre-computed child +/// statistics. +/// +/// The `partition` parameter is forwarded to children. This is correct +/// for partition-preserving operators (filter, projection, sort, etc.), +/// but operators that need overall child stats regardless of the +/// requested partition (e.g., repartition, coalesce, asymmetric joins) +/// must handle this internally by calling `compute_statistics` with +/// `None` on the relevant children. +pub fn compute_statistics( + plan: &dyn ExecutionPlan, + partition: Option, +) -> Result> { + let child_stats = plan + .children() + .iter() + .map(|child| compute_statistics(child.as_ref(), partition)) + .collect::>>()?; + + let ctx = StatisticsContext::new(child_stats); + plan.partition_statistics(partition, &ctx) +} diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs index 4c4724e4dcc4f..834dff542d2bc 100644 --- a/datafusion/physical-plan/src/test.rs +++ b/datafusion/physical-plan/src/test.rs @@ -29,6 +29,7 @@ use crate::common; use crate::execution_plan::{Boundedness, EmissionType}; use crate::memory::MemoryStream; use crate::metrics::MetricsSet; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use crate::streaming::PartitionStream; use crate::{DisplayAs, DisplayFormatType, PlanProperties}; @@ -181,7 +182,11 @@ impl ExecutionPlan for TestMemoryExec { unimplemented!() } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { Ok(Arc::new(Statistics::new_unknown(&self.schema))) } else { diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs index 200223b9b660a..910fcc79150d2 100644 --- a/datafusion/physical-plan/src/test/exec.rs +++ b/datafusion/physical-plan/src/test/exec.rs @@ -20,7 +20,7 @@ use crate::{ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, common, - execution_plan::Boundedness, + execution_plan::Boundedness, statistics_context::StatisticsContext, }; use crate::{ execution_plan::EmissionType, @@ -257,7 +257,11 @@ impl ExecutionPlan for MockExec { } // Panics if one of the batches is an error - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(&self.schema))); } @@ -489,7 +493,11 @@ impl ExecutionPlan for BarrierExec { Ok(builder.build()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(&self.schema))); } @@ -683,7 +691,11 @@ impl ExecutionPlan for StatisticsExec { unimplemented!("This plan only serves for testing statistics") } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(if partition.is_some() { Statistics::new_unknown(&self.schema) } else { diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 20295b7e6fac9..58042004be087 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -44,6 +44,7 @@ use crate::filter_pushdown::{ }; use crate::metrics::BaselineMetrics; use crate::projection::{ProjectionExec, make_with_child}; +use crate::statistics_context::StatisticsContext; use crate::stream::ObservedStream; use arrow::datatypes::{Field, Schema, SchemaRef}; @@ -327,31 +328,36 @@ impl ExecutionPlan for UnionExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { if let Some(partition_idx) = partition { // For a specific partition, find which input it belongs to let mut remaining_idx = partition_idx; - for input in &self.inputs { + for (i, input) in self.inputs.iter().enumerate() { let input_partition_count = input.output_partitioning().partition_count(); if remaining_idx < input_partition_count { - // This partition belongs to this input - return input.partition_statistics(Some(remaining_idx)); + // This partition belongs to this input - compute stats + // for the specific child at the specific partition + let child = &self.inputs[i]; + return crate::statistics_context::compute_statistics( + child.as_ref(), + Some(remaining_idx), + ); } remaining_idx -= input_partition_count; } // If we get here, the partition index is out of bounds Ok(Arc::new(Statistics::new_unknown(&self.schema()))) } else { - // Collect statistics from all inputs - let stats = self - .inputs + // Use pre-computed child stats from context + let stats = ctx + .child_stats() .iter() - .map(|input_exec| { - input_exec - .partition_statistics(None) - .map(Arc::unwrap_or_clone) - }) - .collect::>>()?; + .map(|s| Arc::unwrap_or_clone(Arc::clone(s))) + .collect::>(); Ok(Arc::new( stats @@ -668,15 +674,16 @@ impl ExecutionPlan for InterleaveExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let stats = self - .inputs + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = ctx + .child_stats() .iter() - .map(|stat| { - stat.partition_statistics(partition) - .map(Arc::unwrap_or_clone) - }) - .collect::>>()?; + .map(|s| Arc::unwrap_or_clone(Arc::clone(s))) + .collect::>(); Ok(Arc::new( stats diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index 14f8ce5e95ffd..85f4e0023ffa3 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -28,6 +28,7 @@ use std::task::{Context, Poll}; use super::utils::create_schema; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; +use crate::statistics_context::StatisticsContext; use crate::windows::{ calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, window_equivalence_properties, @@ -390,9 +391,12 @@ impl ExecutionPlan for BoundedWindowAggExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stat = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stat = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); Ok(Arc::new(self.statistics_helper(input_stat)?)) } diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index 5098c84034062..e7fd4c144ede5 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -24,6 +24,7 @@ use std::task::{Context, Poll}; use super::utils::create_schema; use crate::execution_plan::{CardinalityEffect, EmissionType}; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; +use crate::statistics_context::StatisticsContext; use crate::windows::{ calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, window_equivalence_properties, @@ -293,9 +294,12 @@ impl ExecutionPlan for WindowAggExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stat = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stat = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); let win_cols = self.window_expr.len(); let input_cols = self.input.schema().fields().len(); // TODO stats: some windowing function will maintain invariants such as min, max... diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs index 0855dbf2fd635..6d107567b2d2f 100644 --- a/datafusion/physical-plan/src/work_table.rs +++ b/datafusion/physical-plan/src/work_table.rs @@ -29,6 +29,7 @@ use crate::{ SendableRecordBatchStream, Statistics, }; +use crate::statistics_context::StatisticsContext; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use datafusion_common::tree_node::TreeNodeRecursion; @@ -235,7 +236,11 @@ impl ExecutionPlan for WorkTableExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, _partition: Option) -> Result> { + fn partition_statistics( + &self, + _partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(Statistics::new_unknown(&self.schema()))) } diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index cadd365e1f814..76443ae37d19d 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -380,3 +380,93 @@ impl Default for MyTreeNode { } } ``` + +### `ExecutionPlan::partition_statistics` now accepts a `StatisticsContext` + +`ExecutionPlan::partition_statistics` now takes an additional +`ctx: &StatisticsContext` parameter that carries pre-computed child statistics +and additional context for statistics computation. + +**Before:** + +```rust,ignore +fn partition_statistics(&self, partition: Option) -> Result> { + // Leaf node + Ok(Arc::new(Statistics::new_unknown(&self.schema()))) +} +``` + +**After:** + +```rust,ignore +fn partition_statistics( + &self, + partition: Option, + _ctx: &StatisticsContext, +) -> Result> { + // Leaf node: ignore ctx, return own stats + Ok(Arc::new(Statistics::new_unknown(&self.schema()))) +} +``` + +**Who is affected:** + +- Users who implement custom `ExecutionPlan` nodes +- Users who call `partition_statistics` directly + +**Migration guide:** + +For **implementations**, add the `ctx: &StatisticsContext` parameter. Leaf nodes +that do not have children can use `_ctx` (ignored). Non-leaf nodes that +previously called `self.input.partition_statistics(partition)?` to obtain child +statistics can use `ctx.child_stats()[0]` instead (or `ctx.child_stats()[i]` +for multi-child operators like joins): + +```rust,ignore +// Before (non-leaf): +fn partition_statistics(&self, partition: Option) -> Result> { + let child_stats = self.input.partition_statistics(partition)?; + // ... transform child_stats ... +} + +// After (non-leaf): +fn partition_statistics( + &self, + _partition: Option, + ctx: &StatisticsContext, +) -> Result> { + let child_stats = Arc::clone(&ctx.child_stats()[0]); + // ... transform child_stats ... +} +``` + +Operators that **merge or repartition** their input (e.g., coalesce, sort +without partition preservation, sort-preserving merge) always need overall +child statistics regardless of which output partition is requested. These +operators should call `compute_statistics` with `None` on the relevant +child instead of using `ctx.child_stats()`: + +```rust,ignore +// Operator that merges all input partitions into one: +fn partition_statistics( + &self, + _partition: Option, + _ctx: &StatisticsContext, +) -> Result> { + compute_statistics(self.input.as_ref(), None) +} +``` + +For **callers**, replace direct calls with `compute_statistics`, which walks +the plan tree bottom-up and threads child statistics through the context +automatically: + +```rust,ignore +use datafusion_physical_plan::compute_statistics; + +// Before: +let stats = plan.partition_statistics(None)?; + +// After: +let stats = compute_statistics(plan.as_ref(), None)?; +```