From 4a85d69e910ff62ca359f08a4a03c8531fddd8be Mon Sep 17 00:00:00 2001 From: Duong Cong Toai Date: Fri, 19 Sep 2025 20:58:15 +0200 Subject: [PATCH 1/2] fix: correct statistics for nestedloopexec --- .../src/joins/nested_loop_join.rs | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 5bb1673d4af2..c3ba95a03b97 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -564,7 +564,7 @@ impl ExecutionPlan for NestedLoopJoinExec { self.right.partition_statistics(None)?, vec![], &self.join_type, - &self.join_schema, + &self.schema(), ) } @@ -1557,6 +1557,26 @@ pub(crate) mod tests { Ok(()) } + #[tokio::test] + async fn join_has_correct_stats() -> Result<()> { + let left = build_left_table(); + let right = build_right_table(); + let nested_loop_join = NestedLoopJoinExec::try_new( + left, + right, + None, + &JoinType::Left, + Some(vec![1, 2]), + )?; + let stats = nested_loop_join.partition_statistics(None)?; + assert_eq!( + nested_loop_join.schema().fields().len(), + stats.column_statistics.len(), + ); + assert_eq!(2, stats.column_statistics.len()); + Ok(()) + } + #[rstest] #[tokio::test] async fn join_right_semi_with_filter( From db10ee47115313e63821be9de5a037d13ff453a3 Mon Sep 17 00:00:00 2001 From: Duong Cong Toai Date: Sat, 20 Sep 2025 13:18:14 +0200 Subject: [PATCH 2/2] chore: update comment --- datafusion/physical-plan/src/joins/nested_loop_join.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index c3ba95a03b97..a43621fd899a 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -162,7 +162,8 @@ pub struct NestedLoopJoinExec { pub(crate) filter: Option, /// How the join is performed pub(crate) join_type: JoinType, - /// The schema once the join is applied + /// The full concatenated schema of left and right children should be distinct from + /// the output schema of the operator join_schema: SchemaRef, /// Future that consumes left input and buffers it in memory ///