-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Description
Describe the bug
AggregateStatistics is a physical optimizer pass that changes the output of aggregates such as COUNT to a constant based on statistics.
#2636 changes the type of count() output to be Int64 but the optimizer pass was not also changed. This means that if the optimization is triggered, the output type of a query with Count in it may be converted to UInt64 instead of Int64
cc @liukun4515
To Reproduce
This can be shown
diff --git a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
index 4cf96d2350..290b30144c 100644
--- a/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
+++ b/datafusion/core/src/physical_optimizer/aggregate_statistics.rs
@@ -293,9 +293,9 @@ mod tests {
/// Checks that the count optimization was applied and we still get the right result
async fn assert_count_optim_success(plan: AggregateExec, nulls: bool) -> Result<()> {
let session_ctx = SessionContext::new();
- let task_ctx = session_ctx.task_ctx();
let conf = session_ctx.copied_config();
- let optimized = AggregateStatistics::new().optimize(Arc::new(plan), &conf)?;
+ let plan = Arc::new(plan) as _;
+ let optimized = AggregateStatistics::new().optimize(Arc::clone(&plan), &conf)?;
let (col, count) = match nulls {
false => (Field::new("COUNT(UInt8(1))", DataType::UInt64, false), 3),
@@ -304,6 +304,7 @@ mod tests {
// A ProjectionExec is a sign that the count optimization was applied
assert!(optimized.as_any().is::<ProjectionExec>());
+ let task_ctx = session_ctx.task_ctx();
let result = common::collect(optimized.execute(0, task_ctx)?).await?;
assert_eq!(result[0].schema(), Arc::new(Schema::new(vec![col])));
assert_eq!(
@@ -315,6 +316,12 @@ mod tests {
.values(),
&[count]
);
+
+ // Validate that the optimized plan returns the exact same
+ // answer (both schema and data) as the original plan
+ let task_ctx = session_ctx.task_ctx();
+ let plan_result = common::collect(plan.execute(0, task_ctx)?).await?;
+ assert_eq!(result, plan_result);
Ok(())
}This test fails on master (note the error about UInt64 but expected Int64):
---- physical_optimizer::aggregate_statistics::tests::test_count_partial_indirect_child stdout ----
Error: ArrowError(InvalidArgumentError("column types must match schema types, expected UInt64 but found Int64 at column index 0"))
thread 'physical_optimizer::aggregate_statistics::tests::test_count_partial_indirect_child' panicked at 'assertion failed: (left == right)
left: 1,
right: 0: the test returned a termination value with a non-zero status code (1) which indicates a failure', /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/test/src/lib.rs:186:5
stack backtrace:
0: rust_begin_unwind
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/std/src/panicking.rs:584:5
1: core::panicking::panic_fmt
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/panicking.rs:143:14
2: core::panicking::assert_failed_inner
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/panicking.rs:219:23
3: core::panicking::assert_failed
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/panicking.rs:182:5
4: test::assert_test_result
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/test/src/lib.rs:186:5
5: datafusion::physical_optimizer::aggregate_statistics::tests::test_count_partial_indirect_child::{{closure}}
at ./src/physical_optimizer/aggregate_statistics.rs:392:11
6: core::ops::function::FnOnce::call_once
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/ops/function.rs:227:5
7: core::ops::function::FnOnce::call_once
at /rustc/fe5b13d681f25ee6474be29d748c65adcd91f69e/library/core/src/ops/function.rs:227:5
note: Some details are omitted, run with RUST_BACKTRACE=full for a verbose backtrace.
Expected behavior
The output type should not be changed, and the test should pass
Additional context
This was caught by some of IOx's tests (see https://github.com/influxdata/influxdb_iox/pull/4743)