From 9ae29cb8c40a9b0595a04461ca990f6d55f6ee69 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Mon, 24 Nov 2025 09:19:10 +0100 Subject: [PATCH 1/2] Allocate a buffer of the correct length for ScalarValue::FixedSizeBinary in ScalarValue::to_array_of_size --- datafusion/common/src/scalar/mod.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 3fd5a3722455..354656b2672e 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -21,6 +21,8 @@ mod cache; mod consts; mod struct_builder; +use arrow::buffer::MutableBuffer; +use arrow::buffer::NullBuffer; use std::borrow::Borrow; use std::cmp::Ordering; use std::collections::{HashSet, VecDeque}; @@ -3055,7 +3057,19 @@ impl ScalarValue { ) .unwrap(), ), - None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)), + None => { + // TODO: Replace with FixedSizeBinaryArray::new_null once a fix for + // https://github.com/apache/arrow-rs/issues/8900 is in the used arrow-rs + // version. + let capacity_in_bytes = + s.to_usize().unwrap().checked_mul(size).unwrap(); + Arc::new(FixedSizeBinaryArray::try_new( + *s, + // MutableBuffer::new_null is in bits. + MutableBuffer::new_null(capacity_in_bytes * 8).into(), + Some(NullBuffer::new_null(size)), + )?) + } }, ScalarValue::LargeBinary(e) => match e { Some(value) => Arc::new( @@ -5314,6 +5328,18 @@ mod tests { assert_eq!(empty_array.len(), 0); } + /// See https://github.com/apache/datafusion/issues/18870 + #[test] + fn test_to_array_of_size_for_none_fsb() { + let sv = ScalarValue::FixedSizeBinary(5, None); + let result = sv + .to_array_of_size(2) + .expect("Failed to convert to array of size"); + assert_eq!(result.len(), 2); + assert_eq!(result.null_count(), 2); + assert_eq!(result.as_fixed_size_binary().values().len(), 10); + } + #[test] fn test_list_to_array_string() { let scalars = vec![ From b79d12210d00c0ae21092dbcd2871f19edf45dc9 Mon Sep 17 00:00:00 2001 From: Tobias Schwarzinger Date: Wed, 26 Nov 2025 10:18:34 +0100 Subject: [PATCH 2/2] Use FixedSizeBinaryBuilder instead of directly constructing the FixedSizeBinaryArray in `ScalarValue::to_array_of_size` --- datafusion/common/src/scalar/mod.rs | 31 +++++++++++------------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 354656b2672e..ddb8b9a71fc9 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -21,8 +21,6 @@ mod cache; mod consts; mod struct_builder; -use arrow::buffer::MutableBuffer; -use arrow::buffer::NullBuffer; use std::borrow::Borrow; use std::cmp::Ordering; use std::collections::{HashSet, VecDeque}; @@ -63,15 +61,15 @@ use arrow::array::{ Date64Array, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray, - FixedSizeListArray, Float16Array, Float32Array, Float64Array, GenericListArray, - Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray, - IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray, - LargeStringArray, ListArray, MapArray, MutableArrayData, OffsetSizeTrait, - PrimitiveArray, Scalar, StringArray, StringViewArray, StructArray, - Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, - Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, - UInt64Array, UInt8Array, UnionArray, + FixedSizeBinaryBuilder, FixedSizeListArray, Float16Array, Float32Array, Float64Array, + GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, + IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray, + LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, MapArray, + MutableArrayData, OffsetSizeTrait, PrimitiveArray, Scalar, StringArray, + StringViewArray, StructArray, Time32MillisecondArray, Time32SecondArray, + Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, + UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray, }; use arrow::buffer::{BooleanBuffer, ScalarBuffer}; use arrow::compute::kernels::cast::{cast_with_options, CastOptions}; @@ -3061,14 +3059,9 @@ impl ScalarValue { // TODO: Replace with FixedSizeBinaryArray::new_null once a fix for // https://github.com/apache/arrow-rs/issues/8900 is in the used arrow-rs // version. - let capacity_in_bytes = - s.to_usize().unwrap().checked_mul(size).unwrap(); - Arc::new(FixedSizeBinaryArray::try_new( - *s, - // MutableBuffer::new_null is in bits. - MutableBuffer::new_null(capacity_in_bytes * 8).into(), - Some(NullBuffer::new_null(size)), - )?) + let mut builder = FixedSizeBinaryBuilder::new(*s); + builder.append_nulls(size); + Arc::new(builder.finish()) } }, ScalarValue::LargeBinary(e) => match e {