Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 44 additions & 1 deletion datafusion/functions/benches/iszero.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use arrow::{
util::bench_util::create_primitive_array,
};
use criterion::{Criterion, criterion_group, criterion_main};
use datafusion_common::ScalarValue;
use datafusion_common::config::ConfigOptions;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
use datafusion_functions::math::iszero;
Expand All @@ -31,6 +32,8 @@ use std::sync::Arc;

fn criterion_benchmark(c: &mut Criterion) {
let iszero = iszero();
let config_options = Arc::new(ConfigOptions::default());

for size in [1024, 4096, 8192] {
let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
let batch_len = f32_array.len();
Expand All @@ -43,7 +46,6 @@ fn criterion_benchmark(c: &mut Criterion) {
})
.collect::<Vec<_>>();
let return_field = Arc::new(Field::new("f", DataType::Boolean, true));
let config_options = Arc::new(ConfigOptions::default());

c.bench_function(&format!("iszero f32 array: {size}"), |b| {
b.iter(|| {
Expand All @@ -60,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) {
)
})
});

let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
let batch_len = f64_array.len();
let f64_args = vec![ColumnarValue::Array(f64_array)];
Expand Down Expand Up @@ -88,6 +91,46 @@ fn criterion_benchmark(c: &mut Criterion) {
})
});
}

// Scalar benchmarks - run once since size doesn't affect scalar performance
let scalar_f32_args = vec![ColumnarValue::Scalar(ScalarValue::Float32(Some(1.0)))];
let scalar_f32_arg_fields = vec![Field::new("a", DataType::Float32, false).into()];
let return_field_scalar = Arc::new(Field::new("f", DataType::Boolean, false));

c.bench_function("iszero f32 scalar", |b| {
b.iter(|| {
black_box(
iszero
.invoke_with_args(ScalarFunctionArgs {
args: scalar_f32_args.clone(),
arg_fields: scalar_f32_arg_fields.clone(),
number_rows: 1,
return_field: Arc::clone(&return_field_scalar),
config_options: Arc::clone(&config_options),
})
.unwrap(),
)
})
});

let scalar_f64_args = vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(1.0)))];
let scalar_f64_arg_fields = vec![Field::new("a", DataType::Float64, false).into()];

c.bench_function("iszero f64 scalar", |b| {
b.iter(|| {
black_box(
iszero
.invoke_with_args(ScalarFunctionArgs {
args: scalar_f64_args.clone(),
arg_fields: scalar_f64_arg_fields.clone(),
number_rows: 1,
return_field: Arc::clone(&return_field_scalar),
config_options: Arc::clone(&config_options),
})
.unwrap(),
)
})
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down
119 changes: 46 additions & 73 deletions datafusion/functions/src/math/iszero.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,20 @@
use std::any::Any;
use std::sync::Arc;

use arrow::array::{ArrayRef, ArrowNativeTypeOp, AsArray, BooleanArray};
use arrow::array::{ArrowNativeTypeOp, AsArray, BooleanArray};
use arrow::datatypes::DataType::{Boolean, Float16, Float32, Float64};
use arrow::datatypes::{DataType, Float16Type, Float32Type, Float64Type};

use datafusion_common::types::NativeType;
use datafusion_common::{Result, ScalarValue, exec_err};
use datafusion_common::utils::take_function_args;
use datafusion_common::{Result, ScalarValue, internal_err};
use datafusion_expr::{Coercion, TypeSignatureClass};
use datafusion_expr::{
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
Volatility,
};
use datafusion_macros::user_doc;

use crate::utils::make_scalar_function;

#[user_doc(
doc_section(label = "Math Functions"),
description = "Returns true if a given number is +0.0 or -0.0 otherwise returns false.",
Expand Down Expand Up @@ -90,79 +89,53 @@ impl ScalarUDFImpl for IsZeroFunc {
}

fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
// Handle NULL input
if args.args[0].data_type().is_null() {
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
let [arg] = take_function_args(self.name(), args.args)?;

match arg {
ColumnarValue::Scalar(scalar) => {
if scalar.is_null() {
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
}

match scalar {
ScalarValue::Float64(Some(v)) => {
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0))))
}
ScalarValue::Float32(Some(v)) => {
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0))))
}
ScalarValue::Float16(Some(v)) => Ok(ColumnarValue::Scalar(
ScalarValue::Boolean(Some(v.is_zero())),
)),
_ => {
internal_err!(
"Unexpected scalar type for iszero: {:?}",
scalar.data_type()
)
}
}
}
ColumnarValue::Array(array) => match array.data_type() {
Float64 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary(
array.as_primitive::<Float64Type>(),
|x| x == 0.0,
)))),
Float32 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary(
array.as_primitive::<Float32Type>(),
|x| x == 0.0,
)))),
Float16 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary(
array.as_primitive::<Float16Type>(),
|x| x.is_zero(),
)))),
other => {
internal_err!("Unexpected data type {other:?} for function iszero")
}
},
}
make_scalar_function(iszero, vec![])(&args.args)
}

fn documentation(&self) -> Option<&Documentation> {
self.doc()
}
}

/// Iszero SQL function
fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
match args[0].data_type() {
Float64 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float64Type>(),
|x| x == 0.0,
)) as ArrayRef),

Float32 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float32Type>(),
|x| x == 0.0,
)) as ArrayRef),

Float16 => Ok(Arc::new(BooleanArray::from_unary(
args[0].as_primitive::<Float16Type>(),
|x| x.is_zero(),
)) as ArrayRef),

other => exec_err!("Unsupported data type {other:?} for function iszero"),
}
}

#[cfg(test)]
mod test {
use std::sync::Arc;

use arrow::array::{ArrayRef, Float32Array, Float64Array};

use datafusion_common::cast::as_boolean_array;

use crate::math::iszero::iszero;

#[test]
fn test_iszero_f64() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}

#[test]
fn test_iszero_f32() {
let args: Vec<ArrayRef> =
vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))];

let result = iszero(&args).expect("failed to initialize function iszero");
let booleans =
as_boolean_array(&result).expect("failed to initialize function iszero");

assert_eq!(booleans.len(), 4);
assert!(!booleans.value(0));
assert!(booleans.value(1));
assert!(!booleans.value(2));
assert!(booleans.value(3));
}
}