diff --git a/datafusion/src/physical_plan/distinct_expressions.rs b/datafusion/src/physical_plan/distinct_expressions.rs index 8534e9c8805c..1c93b5a104d0 100644 --- a/datafusion/src/physical_plan/distinct_expressions.rs +++ b/datafusion/src/physical_plan/distinct_expressions.rs @@ -195,10 +195,9 @@ impl Accumulator for DistinctCountAccumulator { mod tests { use super::*; - use arrow::array::ArrayRef; use arrow::array::{ - Int16Array, Int32Array, Int64Array, Int8Array, ListArray, UInt16Array, - UInt32Array, UInt64Array, UInt8Array, + ArrayRef, BooleanArray, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, + UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; use arrow::array::{Int32Builder, ListBuilder, UInt64Builder}; use arrow::datatypes::DataType; @@ -396,6 +395,61 @@ mod tests { test_count_distinct_update_batch_numeric!(UInt64Array, UInt64, u64) } + #[test] + fn count_distinct_update_batch_boolean() -> Result<()> { + let get_count = |data: BooleanArray| -> Result<(Vec>, u64)> { + let arrays = vec![Arc::new(data) as ArrayRef]; + let (states, result) = run_update_batch(&arrays)?; + let mut state_vec = state_to_vec!(&states[0], Boolean, bool).unwrap(); + state_vec.sort(); + let count = match result { + ScalarValue::UInt64(c) => c.ok_or_else(|| { + DataFusionError::Internal("Found None count".to_string()) + }), + scalar => Err(DataFusionError::Internal(format!( + "Found non Uint64 scalar value from count: {}", + scalar + ))), + }?; + Ok((state_vec, count)) + }; + + let zero_count_values = BooleanArray::from(Vec::::new()); + + let one_count_values = BooleanArray::from(vec![false, false]); + let one_count_values_with_null = + BooleanArray::from(vec![Some(true), Some(true), None, None]); + + let two_count_values = BooleanArray::from(vec![true, false, true, false, true]); + let two_count_values_with_null = BooleanArray::from(vec![ + Some(true), + Some(false), + None, + None, + Some(true), + Some(false), + ]); + + assert_eq!( + get_count(zero_count_values)?, + (Vec::>::new(), 0) + ); + assert_eq!(get_count(one_count_values)?, (vec![Some(false)], 1)); + assert_eq!( + get_count(one_count_values_with_null)?, + (vec![Some(true)], 1) + ); + assert_eq!( + get_count(two_count_values)?, + (vec![Some(false), Some(true)], 2) + ); + assert_eq!( + get_count(two_count_values_with_null)?, + (vec![Some(false), Some(true)], 2) + ); + Ok(()) + } + #[test] fn count_distinct_update_batch_all_nulls() -> Result<()> { let arrays = vec![Arc::new(Int32Array::from( diff --git a/datafusion/src/scalar.rs b/datafusion/src/scalar.rs index 833f707e971e..6f03194f4542 100644 --- a/datafusion/src/scalar.rs +++ b/datafusion/src/scalar.rs @@ -345,6 +345,7 @@ impl ScalarValue { ), }, ScalarValue::List(values, data_type) => Arc::new(match data_type { + DataType::Boolean => build_list!(BooleanBuilder, Boolean, values, size), DataType::Int8 => build_list!(Int8Builder, Int8, values, size), DataType::Int16 => build_list!(Int16Builder, Int16, values, size), DataType::Int32 => build_list!(Int32Builder, Int32, values, size),