From 4f52a252374da49d7346aeb2e1b996133f8cf6b2 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 13 Sep 2022 14:23:52 -0700 Subject: [PATCH] Add divide_opt kernel which produce null values on division by zero error (#2710) * Add divide_opt kernel * Add fast-path for non-null arrays * Add doc --- arrow/src/compute/kernels/arithmetic.rs | 51 +++++++++++++++++++++-- arrow/src/compute/kernels/arity.rs | 55 ++++++++++++++++++++++++- 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 6638ae1e87d..a344407e426 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -32,7 +32,7 @@ use crate::buffer::Buffer; use crate::buffer::MutableBuffer; use crate::compute::kernels::arity::unary; use crate::compute::util::combine_option_bitmap; -use crate::compute::{binary, try_binary, try_unary, unary_dyn}; +use crate::compute::{binary, binary_opt, try_binary, try_unary, unary_dyn}; use crate::datatypes::{ native_op::ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type, Date64Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, @@ -711,7 +711,7 @@ where } /// Perform `left + right` operation on two arrays. If either left or right value is null -/// then the result is also null. Once +/// then the result is also null. /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `add` instead. @@ -1118,6 +1118,32 @@ where return math_checked_divide_op(left, right, |a, b| a.div_checked(b)); } +/// Perform `left / right` operation on two arrays. If either left or right value is null +/// then the result is also null. +/// +/// If any right hand value is zero, the operation value will be replaced with null in the +/// result. +/// +/// Unlike `divide` or `divide_checked`, division by zero will get a null value instead +/// returning an `Err`, this also doesn't check overflowing, overflowing will just wrap +/// the result around. +pub fn divide_opt( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: ArrowNumericType, + T::Native: ArrowNativeTypeOp + Zero + One, +{ + Ok(binary_opt(left, right, |a, b| { + if b.is_zero() { + None + } else { + Some(a.div_wrapping(b)) + } + })) +} + /// Perform `left / right` operation on two arrays. If either left or right value is null /// then the result is also null. If any right hand value is zero then the result of this /// operation will be `Err(ArrowError::DivideByZero)`. @@ -1152,7 +1178,7 @@ pub fn divide( right: &PrimitiveArray, ) -> Result> where - T: datatypes::ArrowNumericType, + T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { math_op(left, right, |a, b| a.div_wrapping(b)) @@ -2195,4 +2221,23 @@ mod tests { let overflow = multiply_scalar_checked(&a, i32::MAX); overflow.expect_err("overflow should be detected"); } + + #[test] + fn test_primitive_div_opt_overflow_division_by_zero() { + let a = Int32Array::from(vec![i32::MIN]); + let b = Int32Array::from(vec![-1]); + + let wrapped = divide(&a, &b); + let expected = Int32Array::from(vec![-2147483648]); + assert_eq!(expected, wrapped.unwrap()); + + let overflow = divide_opt(&a, &b); + let expected = Int32Array::from(vec![-2147483648]); + assert_eq!(expected, overflow.unwrap()); + + let b = Int32Array::from(vec![0]); + let overflow = divide_opt(&a, &b); + let expected = Int32Array::from(vec![None]); + assert_eq!(expected, overflow.unwrap()); + } } diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs index ee3ff5e23a8..fffa81af819 100644 --- a/arrow/src/compute/kernels/arity.rs +++ b/arrow/src/compute/kernels/arity.rs @@ -18,7 +18,7 @@ //! Defines kernels suitable to perform operations to primitive arrays. use crate::array::{ - Array, ArrayData, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray, + Array, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray, }; use crate::buffer::Buffer; use crate::compute::util::combine_option_bitmap; @@ -257,6 +257,59 @@ where Ok(unsafe { build_primitive_array(len, buffer.finish(), null_count, null_buffer) }) } +/// Applies the provided binary operation across `a` and `b`, collecting the optional results +/// into a [`PrimitiveArray`]. If any index is null in either `a` or `b`, the corresponding +/// index in the result will also be null. The binary operation could return `None` which +/// results in a new null in the collected [`PrimitiveArray`]. +/// +/// The function is only evaluated for non-null indices +/// +/// # Panic +/// +/// Panics if the arrays have different lengths +pub(crate) fn binary_opt( + a: &PrimitiveArray, + b: &PrimitiveArray, + op: F, +) -> PrimitiveArray +where + A: ArrowPrimitiveType, + B: ArrowPrimitiveType, + O: ArrowPrimitiveType, + F: Fn(A::Native, B::Native) -> Option, +{ + assert_eq!(a.len(), b.len()); + + if a.is_empty() { + return PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE)); + } + + if a.null_count() == 0 && b.null_count() == 0 { + a.values() + .iter() + .zip(b.values().iter()) + .map(|(a, b)| op(*a, *b)) + .collect() + } else { + let iter_a = ArrayIter::new(a); + let iter_b = ArrayIter::new(b); + + let values = + iter_a + .into_iter() + .zip(iter_b.into_iter()) + .map(|(item_a, item_b)| { + if let (Some(a), Some(b)) = (item_a, item_b) { + op(a, b) + } else { + None + } + }); + + values.collect() + } +} + #[cfg(test)] mod tests { use super::*;