diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index cca68844a168..3f383b9332a5 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -17,9 +17,11 @@ use std::sync::Arc; -use arrow::array::{ArrayRef, GenericStringBuilder, OffsetSizeTrait}; +use arrow::array::{Array, ArrayRef, OffsetSizeTrait}; +use arrow::buffer::NullBuffer; use arrow::datatypes::DataType; +use crate::strings::GenericStringArrayBuilder; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; use datafusion_common::types::logical_string; @@ -160,25 +162,29 @@ fn replace_view(args: &[ArrayRef]) -> Result { let from_array = as_string_view_array(&args[1])?; let to_array = as_string_view_array(&args[2])?; - let mut builder = GenericStringBuilder::::new(); + let len = string_array.len(); + let mut builder = GenericStringArrayBuilder::::with_capacity(len, 0); let mut buffer = String::new(); + let nulls = NullBuffer::union( + NullBuffer::union(string_array.nulls(), from_array.nulls()).as_ref(), + to_array.nulls(), + ); - for ((string, from), to) in string_array - .iter() - .zip(from_array.iter()) - .zip(to_array.iter()) - { - match (string, from, to) { - (Some(string), Some(from), Some(to)) => { - buffer.clear(); - replace_into_string(&mut buffer, string, from, to); - builder.append_value(&buffer); - } - _ => builder.append_null(), + for i in 0..len { + if nulls.as_ref().is_some_and(|n| n.is_null(i)) { + builder.append_placeholder(); + continue; } + // SAFETY: union of input nulls is non-null at i, so each input is too. + let string = unsafe { string_array.value_unchecked(i) }; + let from = unsafe { from_array.value_unchecked(i) }; + let to = unsafe { to_array.value_unchecked(i) }; + buffer.clear(); + replace_into_string(&mut buffer, string, from, to); + builder.append_value(&buffer); } - Ok(Arc::new(builder.finish()) as ArrayRef) + Ok(Arc::new(builder.finish(nulls)?) as ArrayRef) } /// Replaces all occurrences in string of substring from with substring to. @@ -188,25 +194,29 @@ fn replace(args: &[ArrayRef]) -> Result { let from_array = as_generic_string_array::(&args[1])?; let to_array = as_generic_string_array::(&args[2])?; - let mut builder = GenericStringBuilder::::new(); + let len = string_array.len(); + let mut builder = GenericStringArrayBuilder::::with_capacity(len, 0); let mut buffer = String::new(); + let nulls = NullBuffer::union( + NullBuffer::union(string_array.nulls(), from_array.nulls()).as_ref(), + to_array.nulls(), + ); - for ((string, from), to) in string_array - .iter() - .zip(from_array.iter()) - .zip(to_array.iter()) - { - match (string, from, to) { - (Some(string), Some(from), Some(to)) => { - buffer.clear(); - replace_into_string(&mut buffer, string, from, to); - builder.append_value(&buffer); - } - _ => builder.append_null(), + for i in 0..len { + if nulls.as_ref().is_some_and(|n| n.is_null(i)) { + builder.append_placeholder(); + continue; } + // SAFETY: union of input nulls is non-null at i, so each input is too. + let string = unsafe { string_array.value_unchecked(i) }; + let from = unsafe { from_array.value_unchecked(i) }; + let to = unsafe { to_array.value_unchecked(i) }; + buffer.clear(); + replace_into_string(&mut buffer, string, from, to); + builder.append_value(&buffer); } - Ok(Arc::new(builder.finish()) as ArrayRef) + Ok(Arc::new(builder.finish(nulls)?) as ArrayRef) } /// Helper function to perform string replacement into a reusable String buffer