Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion datafusion/core/src/dataframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,10 @@ impl DataFrame {
{
let column =
batchs[0].column_by_name(field.name()).unwrap();
if field.data_type().is_numeric() {

if column.data_type().is_null() {
Arc::new(StringArray::from(vec!["null"]))
} else if field.data_type().is_numeric() {
cast(column, &DataType::Float64)?
} else {
cast(column, &DataType::Utf8)?
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/tests/dataframe/describe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ async fn describe_null() -> Result<()> {
"| null_count | 0 | 1 |",
"| mean | null | null |",
"| std | null | null |",
"| min | null | null |",
"| max | null | null |",
"| min | a | null |",
Copy link
Contributor

@jayzhan211 jayzhan211 Aug 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it a but not null?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The result of 'a' seems correct compared to the test case just above it.

https://github.com/apache/datafusion/blob/2521043ddcb3895a2010b8e328f3fa10f77fc094/datafusion/core/tests/dataframe/describe.rs#L54C1-L83C1

There are at least 2 issues here,

  1. In the previous, when null is not supported in the min/max accumulator, the min, max field in the record batches are [Err] instead of [{'a', Err}] and that's where the 'null' 'null' comes from. Below is the log when I debugged on the master branch.
...
batchs: Ok([RecordBatch { schema: Schema { fields: [Field { name: "a", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "b", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, columns: [PrimitiveArray<Int64>
[
  0,
], PrimitiveArray<Int64>
[
  1,
]], row_count: 1 }])
batchs: Err(Internal("Min/Max accumulator not implemented for type Null\n\nbacktrace:    0: datafusion_common::error::DataFusionError::get_back_trace\n             at /home/xinli/source/repos/datafusion/datafusion/common/src/error.rs:392:30\n   1: datafusion_functions_aggregate::min_max::min_batch\n             at /home/xinli/source/repos/datafusion/datafusion/functions-aggregate/src/min_max.rs:422:24\n   2: <datafusion_functions_aggregate::min_max::MinAccumulator as datafusion_expr::accumulator::Accumulator>::update_batch\n             at /home/xinli/source/repos/datafusion/datafusion/functions-aggregate/src/min_max.rs:1064:22\n   3: datafusion_physical_plan::aggregates::no_grouping::aggregate_batch::{{closure}}\n             at /home/xinli/source/repos/datafusion/datafusion/physical-plan/src/aggregates/no_grouping.rs:236:55\n   4: core::iter::traits::iterator::Iterator::try_for_each::call::{{closure}}\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/iter/traits/iterator.rs:2470:26\n   5: core::iter::traits::iterator::Iterator::try_fold\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/iter/traits/iterator.rs:2411:21\n   6: core::iter::traits::iterator::Iterator::try_for_each\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/iter/traits/iterator.rs:2473:9\n   7: datafusion_physical_plan::aggregates::no_grouping::aggregate_batch\n             at /home/xinli/source/repos/datafusion/datafusion/physical-plan/src/aggregates/no_grouping.rs:211:5\n   8: datafusion_physical_plan::aggregates::no_grouping::AggregateStream::new::{{closure}}::{{closure}}\n             at /home/xinli/source/repos/datafusion/datafusion/physical-plan/src/aggregates/no_grouping.rs:116:38\n   9: <futures_util::stream::unfold::Unfold<T,F,Fut> as futures_core::stream::Stream>::poll_next\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/futures-util-0.3.30/src/stream/unfold.rs:107:33\n  10: <futures_util::stream::stream::fuse::Fuse<S> as futures_core::stream::Stream>::poll_next\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/futures-util-0.3.30/src/stream/stream/fuse.rs:53:27\n  11: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/futures-core-0.3.30/src/stream.rs:120:9\n  12: futures_util::stream::stream::StreamExt::poll_next_unpin\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/futures-util-0.3.30/src/stream/stream/mod.rs:1638:9\n  13: <datafusion_physical_plan::aggregates::no_grouping::AggregateStream as futures_core::stream::Stream>::poll_next\n             at /home/xinli/source/repos/datafusion/datafusion/physical-plan/src/aggregates/no_grouping.rs:181:9\n  14: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/futures-core-0.3.30/src/stream.rs:120:9\n  15: <S as futures_core::stream::TryStream>::try_poll_next\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/futures-core-0.3.30/src/stream.rs:196:9\n  16: <futures_util::stream::try_stream::try_collect::TryCollect<St,C> as core::future::future::Future>::poll\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/futures-util-0.3.30/src/stream/try_stream/try_collect.rs:46:26\n  17: datafusion_physical_plan::common::collect::{{closure}}\n             at /home/xinli/source/repos/datafusion/datafusion/physical-plan/src/common.rs:45:36\n  18: datafusion_physical_plan::execution_plan::collect::{{closure}}\n             at /home/xinli/source/repos/datafusion/datafusion/physical-plan/src/execution_plan.rs:680:36\n  19: datafusion::dataframe::DataFrame::collect::{{closure}}\n             at ./src/dataframe/mod.rs:994:33\n  20: datafusion::dataframe::DataFrame::describe::{{closure}}\n             at ./src/dataframe/mod.rs:710:59\n  21: core_integration::dataframe::describe::describe_null::{{closure}}\n             at ./tests/dataframe/describe.rs:93:10\n  22: <core::pin::Pin<P> as core::future::future::Future>::poll\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/future/future.rs:123:9\n  23: <core::pin::Pin<P> as core::future::future::Future>::poll\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/future/future.rs:123:9\n  24: tokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}::{{closure}}::{{closure}}\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:659:57\n  25: tokio::runtime::coop::with_budget\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/coop.rs:107:5\n  26: tokio::runtime::coop::budget\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/coop.rs:73:5\n  27: tokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}::{{closure}}\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:659:25\n  28: tokio::runtime::scheduler::current_thread::Context::enter\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:404:19\n  29: tokio::runtime::scheduler::current_thread::CoreGuard::block_on::{{closure}}\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:658:36\n  30: tokio::runtime::scheduler::current_thread::CoreGuard::enter::{{closure}}\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:737:68\n  31: tokio::runtime::context::scoped::Scoped<T>::set\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/context/scoped.rs:40:9\n  32: tokio::runtime::context::set_scheduler::{{closure}}\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/context.rs:180:26\n  33: std::thread::local::LocalKey<T>::try_with\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/thread/local.rs:283:12\n  34: std::thread::local::LocalKey<T>::with\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/thread/local.rs:260:9\n  35: tokio::runtime::context::set_scheduler\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/context.rs:180:9\n  36: tokio::runtime::scheduler::current_thread::CoreGuard::enter\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:737:27\n  37: tokio::runtime::scheduler::current_thread::CoreGuard::block_on\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:646:19\n  38: tokio::runtime::scheduler::current_thread::CurrentThread::block_on::{{closure}}\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:175:28\n  39: tokio::runtime::context::runtime::enter_runtime\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/context/runtime.rs:65:16\n  40: tokio::runtime::scheduler::current_thread::CurrentThread::block_on\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/scheduler/current_thread/mod.rs:167:9\n  41: tokio::runtime::runtime::Runtime::block_on\n             at /home/xinli/.cargo/registry/src/index.crates.io-6f17d22bba15001f/tokio-1.38.0/src/runtime/runtime.rs:347:47\n  42: core_integration::dataframe::describe::describe_null\n             at ./tests/dataframe/describe.rs:111:5\n  43: core_integration::dataframe::describe::describe_null::{{closure}}\n             at ./tests/dataframe/describe.rs:85:29\n  44: core::ops::function::FnOnce::call_once\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/ops/function.rs:250:5\n  45: core::ops::function::FnOnce::call_once\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/ops/function.rs:250:5\n  46: test::__rust_begin_short_backtrace\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/test/src/lib.rs:625:18\n  47: test::run_test_in_process::{{closure}}\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/test/src/lib.rs:648:60\n  48: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/panic/unwind_safe.rs:272:9\n  49: std::panicking::try::do_call\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:559:40\n  50: std::panicking::try\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:523:19\n  51: std::panic::catch_unwind\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panic.rs:149:14\n  52: test::run_test_in_process\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/test/src/lib.rs:648:27\n  53: test::run_test::{{closure}}\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/test/src/lib.rs:569:43\n  54: test::run_test::{{closure}}\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/test/src/lib.rs:599:41\n  55: std::sys_common::backtrace::__rust_begin_short_backtrace\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys_common/backtrace.rs:155:18\n  56: std::thread::Builder::spawn_unchecked_::{{closure}}::{{closure}}\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/thread/mod.rs:542:17\n  57: <core::panic::unwind_safe::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/panic/unwind_safe.rs:272:9\n  58: std::panicking::try::do_call\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:559:40\n  59: std::panicking::try\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panicking.rs:523:19\n  60: std::panic::catch_unwind\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/panic.rs:149:14\n  61: std::thread::Builder::spawn_unchecked_::{{closure}}\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/thread/mod.rs:541:30\n  62: core::ops::function::FnOnce::call_once{{vtable.shim}}\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/core/src/ops/function.rs:250:5\n  63: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/alloc/src/boxed.rs:2063:9\n  64: <alloc::boxed::Box<F,A> as core::ops::function::FnOnce<Args>>::call_once\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/alloc/src/boxed.rs:2063:9\n  65: std::sys::pal::unix::thread::Thread::new::thread_start\n             at /rustc/051478957371ee0084a7c0913941d2a8c4757bb9/library/std/src/sys/pal/unix/thread.rs:108:17\n  66: <unknown>\n  67: <unknown>\n"))
  1. The comment is mislead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After the NULL issue in min/max accumulator is resolved, the result of describe would changed to a | null

"| max | a | null |",
"| median | null | null |",
"+------------+------+------+"
];
Expand Down
2 changes: 2 additions & 0 deletions datafusion/functions-aggregate/src/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ macro_rules! typed_min_max_batch {
macro_rules! min_max_batch {
($VALUES:expr, $OP:ident) => {{
match $VALUES.data_type() {
DataType::Null => ScalarValue::Null,
DataType::Decimal128(precision, scale) => {
typed_min_max_batch!(
$VALUES,
Expand Down Expand Up @@ -579,6 +580,7 @@ macro_rules! interval_min_max {
macro_rules! min_max {
($VALUE:expr, $DELTA:expr, $OP:ident) => {{
Ok(match ($VALUE, $DELTA) {
(ScalarValue::Null, ScalarValue::Null) => ScalarValue::Null,
(
lhs @ ScalarValue::Decimal128(lhsv, lhsp, lhss),
rhs @ ScalarValue::Decimal128(rhsv, rhsp, rhss)
Expand Down
6 changes: 6 additions & 0 deletions datafusion/sqllogictest/test_files/aggregate.slt
Original file line number Diff line number Diff line change
Expand Up @@ -5548,3 +5548,9 @@ set datafusion.explain.logical_plan_only = false;

statement ok
drop table employee_csv;

# test null literal handling in supported aggregate functions
query I??III?T
select count(null), min(null), max(null), bit_and(NULL), bit_or(NULL), bit_xor(NULL), nth_value(NULL, 1), string_agg(NULL, ',');
Copy link
Contributor

@alamb alamb Aug 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I double checked with postgres. This looks good except for count where count(null) is zero (not null)

Update: I misread the test output -- this PR is correct

postgres=# select count(null), min(null), max(null), string_agg(NULL, ',');
 count | min | max | string_agg
-------+-----+-----+------------
     0 |     |     |
(1 row)

postgres=# select count(null) IS NULL, min(null) IS NULL, max(null) IS NULL, string_agg(NULL, ',') IS NULL;
 ?column? | ?column? | ?column? | ?column?
----------+----------+----------+----------
 f        | t        | t        | t
(1 row)

However, this seems better than what we have today on main (which is an internal error):

DataFusion CLI v40.0.0
> select count(null), min(null), max(null), string_agg(NULL, ',');
Internal error: Min/Max accumulator not implemented for type Null.
This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
> select count(null) IS NULL, min(null) IS NULL, max(null) IS NULL, string_agg(NULL, ',') IS NULL;
Internal error: Min/Max accumulator not implemented for type Null.
This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker

----
0 NULL NULL NULL NULL NULL NULL NULL