Skip to content

Commit

Permalink
Add divide_opt kernel which produce null values on division by zero e…
Browse files Browse the repository at this point in the history
…rror (#2710)

* Add divide_opt kernel

* Add fast-path for non-null arrays

* Add doc
  • Loading branch information
viirya committed Sep 13, 2022
1 parent 7e47fa6 commit 4f52a25
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 4 deletions.
51 changes: 48 additions & 3 deletions arrow/src/compute/kernels/arithmetic.rs
Expand Up @@ -32,7 +32,7 @@ use crate::buffer::Buffer;
use crate::buffer::MutableBuffer;
use crate::compute::kernels::arity::unary;
use crate::compute::util::combine_option_bitmap;
use crate::compute::{binary, try_binary, try_unary, unary_dyn};
use crate::compute::{binary, binary_opt, try_binary, try_unary, unary_dyn};
use crate::datatypes::{
native_op::ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type, Date64Type,
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType,
Expand Down Expand Up @@ -711,7 +711,7 @@ where
}

/// Perform `left + right` operation on two arrays. If either left or right value is null
/// then the result is also null. Once
/// then the result is also null.
///
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
/// use `add` instead.
Expand Down Expand Up @@ -1118,6 +1118,32 @@ where
return math_checked_divide_op(left, right, |a, b| a.div_checked(b));
}

/// Perform `left / right` operation on two arrays. If either left or right value is null
/// then the result is also null.
///
/// If any right hand value is zero, the operation value will be replaced with null in the
/// result.
///
/// Unlike `divide` or `divide_checked`, division by zero will get a null value instead
/// returning an `Err`, this also doesn't check overflowing, overflowing will just wrap
/// the result around.
pub fn divide_opt<T>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
) -> Result<PrimitiveArray<T>>
where
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp + Zero + One,
{
Ok(binary_opt(left, right, |a, b| {
if b.is_zero() {
None
} else {
Some(a.div_wrapping(b))
}
}))
}

/// Perform `left / right` operation on two arrays. If either left or right value is null
/// then the result is also null. If any right hand value is zero then the result of this
/// operation will be `Err(ArrowError::DivideByZero)`.
Expand Down Expand Up @@ -1152,7 +1178,7 @@ pub fn divide<T>(
right: &PrimitiveArray<T>,
) -> Result<PrimitiveArray<T>>
where
T: datatypes::ArrowNumericType,
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
math_op(left, right, |a, b| a.div_wrapping(b))
Expand Down Expand Up @@ -2195,4 +2221,23 @@ mod tests {
let overflow = multiply_scalar_checked(&a, i32::MAX);
overflow.expect_err("overflow should be detected");
}

#[test]
fn test_primitive_div_opt_overflow_division_by_zero() {
let a = Int32Array::from(vec![i32::MIN]);
let b = Int32Array::from(vec![-1]);

let wrapped = divide(&a, &b);
let expected = Int32Array::from(vec![-2147483648]);
assert_eq!(expected, wrapped.unwrap());

let overflow = divide_opt(&a, &b);
let expected = Int32Array::from(vec![-2147483648]);
assert_eq!(expected, overflow.unwrap());

let b = Int32Array::from(vec![0]);
let overflow = divide_opt(&a, &b);
let expected = Int32Array::from(vec![None]);
assert_eq!(expected, overflow.unwrap());
}
}
55 changes: 54 additions & 1 deletion arrow/src/compute/kernels/arity.rs
Expand Up @@ -18,7 +18,7 @@
//! Defines kernels suitable to perform operations to primitive arrays.

use crate::array::{
Array, ArrayData, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray,
Array, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray, PrimitiveArray,
};
use crate::buffer::Buffer;
use crate::compute::util::combine_option_bitmap;
Expand Down Expand Up @@ -257,6 +257,59 @@ where
Ok(unsafe { build_primitive_array(len, buffer.finish(), null_count, null_buffer) })
}

/// Applies the provided binary operation across `a` and `b`, collecting the optional results
/// into a [`PrimitiveArray`]. If any index is null in either `a` or `b`, the corresponding
/// index in the result will also be null. The binary operation could return `None` which
/// results in a new null in the collected [`PrimitiveArray`].
///
/// The function is only evaluated for non-null indices
///
/// # Panic
///
/// Panics if the arrays have different lengths
pub(crate) fn binary_opt<A, B, F, O>(
a: &PrimitiveArray<A>,
b: &PrimitiveArray<B>,
op: F,
) -> PrimitiveArray<O>
where
A: ArrowPrimitiveType,
B: ArrowPrimitiveType,
O: ArrowPrimitiveType,
F: Fn(A::Native, B::Native) -> Option<O::Native>,
{
assert_eq!(a.len(), b.len());

if a.is_empty() {
return PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE));
}

if a.null_count() == 0 && b.null_count() == 0 {
a.values()
.iter()
.zip(b.values().iter())
.map(|(a, b)| op(*a, *b))
.collect()
} else {
let iter_a = ArrayIter::new(a);
let iter_b = ArrayIter::new(b);

let values =
iter_a
.into_iter()
.zip(iter_b.into_iter())
.map(|(item_a, item_b)| {
if let (Some(a), Some(b)) = (item_a, item_b) {
op(a, b)
} else {
None
}
});

values.collect()
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down

0 comments on commit 4f52a25

Please sign in to comment.