apache · Dandandan · Nov 25, 2025 · Nov 22, 2025 · Nov 24, 2025 · Tamar-Posen
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
@@ -3302,30 +3302,33 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
         @r"
-    +---------------+-------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                    |
-    +---------------+-------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: t1.a, t1.b                                                                                                  |
-    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)        |
-    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                         |
-    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                 |
-    |               |         TableScan: t1 projection=[a, b]                                                                                 |
-    |               |         SubqueryAlias: __scalar_sq_1                                                                                    |
-    |               |           Projection: count(Int64(1)) AS count(*), t2.a, Boolean(true) AS __always_true                                 |
-    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1))]]                                                       |
-    |               |               TableScan: t2 projection=[a]                                                                              |
-    | physical_plan | FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                     |
-    |               |   CoalesceBatchesExec: target_batch_size=8192                                                                           |
-    |               |     HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@1)], projection=[a@0, b@1, count(*)@2, __always_true@4] |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                 |
-    |               |       ProjectionExec: expr=[count(Int64(1))@1 as count(*), a@0 as a, true as __always_true]                             |
-    |               |         AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]                                    |
-    |               |           CoalesceBatchesExec: target_batch_size=8192                                                                   |
-    |               |             RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                            |
-    |               |               AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]                                       |
-    |               |                 DataSourceExec: partitions=1, partition_sizes=[1]                                                       |
-    |               |                                                                                                                         |
-    +---------------+-------------------------------------------------------------------------------------------------------------------------+
+    +---------------+------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                         |
+    +---------------+------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: t1.a, t1.b                                                                                                       |
+    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)             |
+    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                              |
+    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                      |
+    |               |         TableScan: t1 projection=[a, b]                                                                                      |
+    |               |         SubqueryAlias: __scalar_sq_1                                                                                         |
+    |               |           Projection: count(Int64(1)) AS count(*), t2.a, Boolean(true) AS __always_true                                      |
+    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1))]]                                                            |
+    |               |               TableScan: t2 projection=[a]                                                                                   |
+    | physical_plan | FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                          |
+    |               |   RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                                       |
+    |               |     ProjectionExec: expr=[a@2 as a, b@3 as b, count(*)@0 as count(*), __always_true@1 as __always_true]                      |
+    |               |       CoalesceBatchesExec: target_batch_size=8192                                                                            |
+    |               |         HashJoinExec: mode=CollectLeft, join_type=Right, on=[(a@1, a@0)], projection=[count(*)@0, __always_true@2, a@3, b@4] |
+    |               |           CoalescePartitionsExec                                                                                             |
+    |               |             ProjectionExec: expr=[count(Int64(1))@1 as count(*), a@0 as a, true as __always_true]                            |
+    |               |               AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]                                   |
+    |               |                 CoalesceBatchesExec: target_batch_size=8192                                                                  |
+    |               |                   RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                           |
+    |               |                     AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]                                      |
+    |               |                       DataSourceExec: partitions=1, partition_sizes=[1]                                                      |
+    |               |           DataSourceExec: partitions=1, partition_sizes=[1]                                                                  |
+    |               |                                                                                                                              |
+    +---------------+------------------------------------------------------------------------------------------------------------------------------+
     "
     );
 
@@ -3357,30 +3360,33 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
         @r"
-    +---------------+-------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                    |
-    +---------------+-------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: t1.a, t1.b                                                                                                  |
-    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)        |
-    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                         |
-    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                 |
-    |               |         TableScan: t1 projection=[a, b]                                                                                 |
-    |               |         SubqueryAlias: __scalar_sq_1                                                                                    |
-    |               |           Projection: count(*), t2.a, Boolean(true) AS __always_true                                                    |
-    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1)) AS count(*)]]                                           |
-    |               |               TableScan: t2 projection=[a]                                                                              |
-    | physical_plan | FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                     |
-    |               |   CoalesceBatchesExec: target_batch_size=8192                                                                           |
-    |               |     HashJoinExec: mode=CollectLeft, join_type=Left, on=[(a@0, a@1)], projection=[a@0, b@1, count(*)@2, __always_true@4] |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                 |
-    |               |       ProjectionExec: expr=[count(*)@1 as count(*), a@0 as a, true as __always_true]                                    |
-    |               |         AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(*)]                                           |
-    |               |           CoalesceBatchesExec: target_batch_size=8192                                                                   |
-    |               |             RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                            |
-    |               |               AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)]                                              |
-    |               |                 DataSourceExec: partitions=1, partition_sizes=[1]                                                       |
-    |               |                                                                                                                         |
-    +---------------+-------------------------------------------------------------------------------------------------------------------------+
+    +---------------+------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                         |
+    +---------------+------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: t1.a, t1.b                                                                                                       |
+    |               |   Filter: CASE WHEN __scalar_sq_1.__always_true IS NULL THEN Int64(0) ELSE __scalar_sq_1.count(*) END > Int64(0)             |
+    |               |     Projection: t1.a, t1.b, __scalar_sq_1.count(*), __scalar_sq_1.__always_true                                              |
+    |               |       Left Join: t1.a = __scalar_sq_1.a                                                                                      |
+    |               |         TableScan: t1 projection=[a, b]                                                                                      |
+    |               |         SubqueryAlias: __scalar_sq_1                                                                                         |
+    |               |           Projection: count(*), t2.a, Boolean(true) AS __always_true                                                         |
+    |               |             Aggregate: groupBy=[[t2.a]], aggr=[[count(Int64(1)) AS count(*)]]                                                |
+    |               |               TableScan: t2 projection=[a]                                                                                   |
+    | physical_plan | FilterExec: CASE WHEN __always_true@3 IS NULL THEN 0 ELSE count(*)@2 END > 0, projection=[a@0, b@1]                          |
+    |               |   RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                                       |
+    |               |     ProjectionExec: expr=[a@2 as a, b@3 as b, count(*)@0 as count(*), __always_true@1 as __always_true]                      |
+    |               |       CoalesceBatchesExec: target_batch_size=8192                                                                            |
+    |               |         HashJoinExec: mode=CollectLeft, join_type=Right, on=[(a@1, a@0)], projection=[count(*)@0, __always_true@2, a@3, b@4] |
+    |               |           CoalescePartitionsExec                                                                                             |
+    |               |             ProjectionExec: expr=[count(*)@1 as count(*), a@0 as a, true as __always_true]                                   |
+    |               |               AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(*)]                                          |
+    |               |                 CoalesceBatchesExec: target_batch_size=8192                                                                  |
+    |               |                   RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                           |
+    |               |                     AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)]                                             |
+    |               |                       DataSourceExec: partitions=1, partition_sizes=[1]                                                      |
+    |               |           DataSourceExec: partitions=1, partition_sizes=[1]                                                                  |
+    |               |                                                                                                                              |
+    +---------------+------------------------------------------------------------------------------------------------------------------------------+
     "
     );
 

diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
@@ -627,7 +627,7 @@ mod test {
 
         let expected_p0_statistics = Statistics {
             num_rows: Precision::Inexact(2),
-            total_byte_size: Precision::Absent,
+            total_byte_size: Precision::Inexact(110),
             column_statistics: vec![
                 ColumnStatistics {
                     null_count: Precision::Absent,
@@ -645,7 +645,7 @@ mod test {
 
         let expected_p1_statistics = Statistics {
             num_rows: Precision::Inexact(2),
-            total_byte_size: Precision::Absent,
+            total_byte_size: Precision::Inexact(110),
             column_statistics: vec![
                 ColumnStatistics {
                     null_count: Precision::Absent,