diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs index 2f14d51797dc..bf16654432b1 100644 --- a/datafusion/expr/src/operator.rs +++ b/datafusion/expr/src/operator.rs @@ -47,6 +47,8 @@ pub enum Operator { Multiply, /// Division operator, like `/` Divide, + /// Same as the Division operator except divide by zero is unchecked + DivideUnchecked, /// Remainder operator, like `%` Modulo, /// Logical AND, like `&&` @@ -96,6 +98,7 @@ impl Operator { | Operator::Minus | Operator::Multiply | Operator::Divide + | Operator::DivideUnchecked | Operator::Modulo | Operator::And | Operator::Or @@ -123,6 +126,7 @@ impl Operator { | Operator::Minus | Operator::Multiply | Operator::Divide + | Operator::DivideUnchecked | Operator::Modulo ) } @@ -173,6 +177,7 @@ impl Operator { | Operator::Minus | Operator::Multiply | Operator::Divide + | Operator::DivideUnchecked | Operator::Modulo | Operator::And | Operator::Or @@ -202,7 +207,10 @@ impl Operator { | Operator::Gt | Operator::GtEq => 20, Operator::Plus | Operator::Minus => 30, - Operator::Multiply | Operator::Divide | Operator::Modulo => 40, + Operator::Multiply + | Operator::Divide + | Operator::DivideUnchecked + | Operator::Modulo => 40, Operator::IsDistinctFrom | Operator::IsNotDistinctFrom | Operator::RegexMatch @@ -232,6 +240,7 @@ impl fmt::Display for Operator { Operator::Minus => "-", Operator::Multiply => "*", Operator::Divide => "/", + Operator::DivideUnchecked => "DIVIDE BY UNCHECKED", Operator::Modulo => "%", Operator::And => "AND", Operator::Or => "OR", diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index c510822445a8..69bef817e224 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -117,6 +117,7 @@ fn signature(lhs: &DataType, op: &Operator, rhs: &DataType) -> Result Operator::Minus | Operator::Multiply | Operator::Divide| + Operator::DivideUnchecked | Operator::Modulo => { // TODO: this logic would be easier to follow if the functions were inlined if let Some(ret) = mathematics_temporal_result_type(lhs, rhs) { @@ -537,7 +538,7 @@ pub fn coercion_decimal_mathematics_type( left_decimal_type, right_decimal_type, ), - Operator::Divide | Operator::Modulo => { + Operator::Divide | Operator::DivideUnchecked | Operator::Modulo => { get_wider_decimal_type(left_decimal_type, right_decimal_type) } _ => None, @@ -575,7 +576,7 @@ pub fn decimal_op_mathematics_type( let result_precision = *p1 + *p2 + 1; Some(create_decimal_type(result_precision, result_scale)) } - Operator::Divide => { + Operator::Divide | Operator::DivideUnchecked => { // max(6, s1 + p2 + 1) let result_scale = 6.max(*s1 + *p2 as i8 + 1); // p1 - s1 + s2 + max(6, s1 + p2 + 1) @@ -953,6 +954,16 @@ mod tests { let result = decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); assert_eq!(DataType::Decimal128(35, 24), result.unwrap()); + let op = Operator::DivideUnchecked; + let result = coercion_decimal_mathematics_type( + &op, + &left_decimal_type, + &right_decimal_type, + ); + assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); + let result = + decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); + assert_eq!(DataType::Decimal128(35, 24), result.unwrap()); let op = Operator::Modulo; let result = coercion_decimal_mathematics_type( &op, @@ -1299,6 +1310,16 @@ mod tests { DataType::Decimal128(23, 11), ); + test_math_decimal_coercion_rule( + DataType::Int32, + DataType::Decimal128(10, 2), + Operator::DivideUnchecked, + DataType::Decimal128(10, 0), + DataType::Decimal128(10, 2), + Some(DataType::Decimal128(12, 2)), + DataType::Decimal128(23, 11), + ); + test_math_decimal_coercion_rule( DataType::Int32, DataType::Decimal128(10, 2), diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 9d2087a0c5b5..742ec88e178c 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -24,7 +24,7 @@ use std::{any::Any, sync::Arc}; use arrow::array::*; use arrow::compute::kernels::arithmetic::{ - add_dyn, add_scalar_dyn as add_dyn_scalar, divide_dyn_checked, + add_dyn, add_scalar_dyn as add_dyn_scalar, divide_dyn_checked, divide_dyn_opt, divide_scalar_dyn as divide_dyn_scalar, modulus_dyn, modulus_scalar_dyn as modulus_dyn_scalar, multiply_dyn, multiply_scalar_dyn as multiply_dyn_scalar, subtract_dyn, @@ -63,14 +63,15 @@ use kernels::{ }; use kernels_arrow::{ add_decimal_dyn_scalar, add_dyn_decimal, add_dyn_temporal, divide_decimal_dyn_scalar, - divide_dyn_checked_decimal, is_distinct_from, is_distinct_from_binary, - is_distinct_from_bool, is_distinct_from_decimal, is_distinct_from_f32, - is_distinct_from_f64, is_distinct_from_null, is_distinct_from_utf8, - is_not_distinct_from, is_not_distinct_from_binary, is_not_distinct_from_bool, - is_not_distinct_from_decimal, is_not_distinct_from_f32, is_not_distinct_from_f64, - is_not_distinct_from_null, is_not_distinct_from_utf8, modulus_decimal_dyn_scalar, - modulus_dyn_decimal, multiply_decimal_dyn_scalar, multiply_dyn_decimal, - subtract_decimal_dyn_scalar, subtract_dyn_decimal, subtract_dyn_temporal, + divide_dyn_checked_decimal, divide_dyn_opt_decimal, is_distinct_from, + is_distinct_from_binary, is_distinct_from_bool, is_distinct_from_decimal, + is_distinct_from_f32, is_distinct_from_f64, is_distinct_from_null, + is_distinct_from_utf8, is_not_distinct_from, is_not_distinct_from_binary, + is_not_distinct_from_bool, is_not_distinct_from_decimal, is_not_distinct_from_f32, + is_not_distinct_from_f64, is_not_distinct_from_null, is_not_distinct_from_utf8, + modulus_decimal_dyn_scalar, modulus_dyn_decimal, multiply_decimal_dyn_scalar, + multiply_dyn_decimal, subtract_decimal_dyn_scalar, subtract_dyn_decimal, + subtract_dyn_temporal, }; use arrow::datatypes::{DataType, Schema, TimeUnit}; @@ -1234,6 +1235,9 @@ impl BinaryExpr { result_type ) } + DivideUnchecked => { + binary_primitive_array_op_dyn!(left, right, divide_dyn_opt, result_type) + } Modulo => { binary_primitive_array_op_dyn!(left, right, modulus_dyn, result_type) } @@ -4309,7 +4313,7 @@ mod tests { let b = Arc::new(Int32Array::from(vec![0])); let err = apply_arithmetic::( - schema, + schema.clone(), vec![a, b], Operator::Divide, Int32Array::from(vec![Some(4), Some(8), Some(16), Some(32), Some(64)]), @@ -4321,6 +4325,15 @@ mod tests { "{err}" ); + let a = Arc::new(Int32Array::from(vec![8, 32, 128, 512, 2048, 100])); + let b = Arc::new(Int32Array::from(vec![2, 4, 8, 16, 32, 0])); + apply_arithmetic::( + schema, + vec![a, b], + Operator::DivideUnchecked, + Int32Array::from(vec![Some(4), Some(8), Some(16), Some(32), Some(64), None]), + )?; + // decimal let schema = Arc::new(Schema::new(vec![ Field::new("a", DataType::Decimal128(25, 3), true), @@ -4330,7 +4343,7 @@ mod tests { let right_decimal_array = Arc::new(create_decimal_array(&[Some(0)], 25, 3)); let err = apply_arithmetic::( - schema, + schema.clone(), vec![left_decimal_array, right_decimal_array], Operator::Divide, create_decimal_array( @@ -4346,6 +4359,21 @@ mod tests { "{err}" ); + let left_decimal_array = + Arc::new(create_decimal_array(&[Some(1234567), Some(1234567)], 25, 3)); + let right_decimal_array = + Arc::new(create_decimal_array(&[Some(10), Some(0)], 25, 3)); + apply_arithmetic::( + schema, + vec![left_decimal_array, right_decimal_array], + Operator::DivideUnchecked, + create_decimal_array( + &[Some(12345670000000000000000000000000000), None], + 38, + 29, + ), + )?; + Ok(()) } diff --git a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs index 9c6645c30371..93f3d8d4c438 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs @@ -19,8 +19,8 @@ //! destined for arrow-rs but are in datafusion until they are ported. use arrow::compute::{ - add_dyn, add_scalar_dyn, divide_dyn_checked, divide_scalar_dyn, modulus_dyn, - modulus_scalar_dyn, multiply_fixed_point, multiply_scalar_checked_dyn, + add_dyn, add_scalar_dyn, divide_dyn_checked, divide_dyn_opt, divide_scalar_dyn, + modulus_dyn, modulus_scalar_dyn, multiply_fixed_point, multiply_scalar_checked_dyn, multiply_scalar_dyn, subtract_dyn, subtract_scalar_dyn, try_unary, }; use arrow::datatypes::{Date32Type, Date64Type, Decimal128Type}; @@ -715,6 +715,24 @@ pub(crate) fn divide_dyn_checked_decimal( left: &dyn Array, right: &dyn Array, result_type: &DataType, +) -> Result { + divide_dyn_decimal(left, right, result_type, true) +} + +pub(crate) fn divide_dyn_opt_decimal( + left: &dyn Array, + right: &dyn Array, + result_type: &DataType, +) -> Result { + divide_dyn_decimal(left, right, result_type, false) +} + +#[inline] +fn divide_dyn_decimal( + left: &dyn Array, + right: &dyn Array, + result_type: &DataType, + checked: bool, ) -> Result { let (precision, scale) = get_precision_scale(result_type)?; @@ -724,7 +742,11 @@ pub(crate) fn divide_dyn_checked_decimal( // Restore to original precision and scale (metadata only) let (org_precision, org_scale) = get_precision_scale(right.data_type())?; let array = decimal_array_with_precision_scale(array, org_precision, org_scale)?; - let array = divide_dyn_checked(&array, right)?; + let array = if checked { + divide_dyn_checked(&array, right)? + } else { + divide_dyn_opt(&array, right)? + }; decimal_array_with_precision_scale(array, precision, scale) } @@ -2248,6 +2270,31 @@ mod tests { 29, ); assert_eq!(&expect, result); + // divide unchecked + let result_type = decimal_op_mathematics_type( + &Operator::DivideUnchecked, + left_decimal_array.data_type(), + right_decimal_array.data_type(), + ) + .unwrap(); + let result = divide_dyn_opt_decimal( + &left_decimal_array, + &right_decimal_array, + &result_type, + )?; + let result = as_decimal128_array(&result)?; + let expect = create_decimal_array( + &[ + Some(12345670000000000000000000000000000), + None, + Some(2244667272727272727272727272727272), + Some(-1003713008130081300813008130081300), + None, + ], + 38, + 29, + ); + assert_eq!(&expect, result); // modulus let result_type = decimal_op_mathematics_type( &Operator::Modulo, diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index ece1651683c2..bff14f89cee4 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -455,6 +455,7 @@ pub fn operator_to_name(op: Operator) -> &'static str { Operator::Minus => "substract", Operator::Multiply => "multiply", Operator::Divide => "divide", + Operator::DivideUnchecked => "divide_unchecked", Operator::Modulo => "mod", Operator::And => "and", Operator::Or => "or",