diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 0281676cabf2..9675d03a0161 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -112,6 +112,11 @@ harness = false name = "make_date" required-features = ["datetime_expressions"] +[[bench]] +harness = false +name = "nullif" +required-features = ["core_expressions"] + [[bench]] harness = false name = "date_bin" diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs new file mode 100644 index 000000000000..dfabad335835 --- /dev/null +++ b/datafusion/functions/benches/nullif.rs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::util::bench_util::create_string_array_with_len; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::ScalarValue; +use datafusion_expr::ColumnarValue; +use datafusion_functions::core::nullif; +use std::sync::Arc; + +fn criterion_benchmark(c: &mut Criterion) { + let nullif = nullif(); + for size in [1024, 4096, 8192] { + let array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); + let args = vec![ + ColumnarValue::Scalar(ScalarValue::Utf8(Some("abcd".to_string()))), + ColumnarValue::Array(array), + ]; + c.bench_function(&format!("nullif scalar array: {}", size), |b| { + b.iter(|| black_box(nullif.invoke(&args).unwrap())) + }); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs index e8bf2db514c3..6fcfbd36416e 100644 --- a/datafusion/functions/src/core/nullif.rs +++ b/datafusion/functions/src/core/nullif.rs @@ -19,7 +19,6 @@ use arrow::datatypes::DataType; use datafusion_common::{exec_err, Result}; use datafusion_expr::ColumnarValue; -use arrow::array::Array; use arrow::compute::kernels::cmp::eq; use arrow::compute::kernels::nullif::nullif; use datafusion_common::ScalarValue; @@ -122,8 +121,13 @@ fn nullif_func(args: &[ColumnarValue]) -> Result { Ok(ColumnarValue::Array(array)) } (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => { - let lhs = lhs.to_array_of_size(rhs.len())?; - let array = nullif(&lhs, &eq(&lhs, &rhs)?)?; + let lhs_s = lhs.to_scalar()?; + let lhs_a = lhs.to_array_of_size(rhs.len())?; + let array = nullif( + // nullif in arrow-select does not support Datum, so we need to convert to array + lhs_a.as_ref(), + &eq(&lhs_s, &rhs)?, + )?; Ok(ColumnarValue::Array(array)) } (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {