diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs index e5314ad220c8..5346dce275a8 100644 --- a/datafusion/functions/src/encoding/inner.rs +++ b/datafusion/functions/src/encoding/inner.rs @@ -29,7 +29,9 @@ use base64::{ Engine as _, }; use datafusion_common::{ - cast::{as_generic_binary_array, as_generic_string_array}, + cast::{ + as_fixed_size_binary_array, as_generic_binary_array, as_generic_string_array, + }, not_impl_err, plan_err, utils::take_function_args, }; @@ -105,6 +107,7 @@ impl ScalarUDFImpl for EncodeFunc { Utf8View => Utf8, Binary => Utf8, LargeBinary => LargeUtf8, + FixedSizeBinary(_) => Utf8, Null => Null, _ => { return plan_err!( @@ -135,6 +138,7 @@ impl ScalarUDFImpl for EncodeFunc { DataType::LargeUtf8 => Ok(vec![DataType::LargeUtf8, DataType::Utf8]), DataType::Binary => Ok(vec![DataType::Binary, DataType::Utf8]), DataType::LargeBinary => Ok(vec![DataType::LargeBinary, DataType::Utf8]), + DataType::FixedSizeBinary(sz) => Ok(vec![DataType::FixedSizeBinary(*sz), DataType::Utf8]), _ => plan_err!( "1st argument should be Utf8 or Binary or Null, got {:?}", arg_types[0] @@ -246,6 +250,9 @@ fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result encoding.encode_utf8_array::(a.as_ref()), DataType::Binary => encoding.encode_binary_array::(a.as_ref()), DataType::LargeBinary => encoding.encode_binary_array::(a.as_ref()), + DataType::FixedSizeBinary(_) => { + encoding.encode_fixed_size_binary_array(a.as_ref()) + } other => exec_err!( "Unsupported data type {other:?} for function encode({encoding})" ), @@ -265,6 +272,9 @@ fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result Ok(encoding .encode_large_scalar(a.as_ref().map(|v: &Vec| v.as_slice()))), + ScalarValue::FixedSizeBinary(_, a) => Ok( + encoding.encode_scalar(a.as_ref().map(|v: &Vec| v.as_slice())) + ), other => exec_err!( "Unsupported data type {other:?} for function encode({encoding})" ), @@ -401,6 +411,15 @@ impl Encoding { Ok(ColumnarValue::Array(array)) } + fn encode_fixed_size_binary_array(self, value: &dyn Array) -> Result { + let input_value = as_fixed_size_binary_array(value)?; + let array: ArrayRef = match self { + Self::Base64 => encode_to_array!(base64_encode, input_value), + Self::Hex => encode_to_array!(hex_encode, input_value), + }; + Ok(ColumnarValue::Array(array)) + } + fn encode_utf8_array(self, value: &dyn Array) -> Result where T: OffsetSizeTrait, @@ -553,3 +572,29 @@ fn decode(args: &[ColumnarValue]) -> Result { }?; decode_process(expression, encoding) } + +#[cfg(test)] +mod tests { + #[test] + fn test_encode_fsb() { + use super::*; + + let value = vec![0u8; 16]; + let array = arrow::array::FixedSizeBinaryArray::try_from_sparse_iter_with_size( + vec![Some(value)].into_iter(), + 16, + ) + .unwrap(); + let value = ColumnarValue::Array(Arc::new(array)); + + let ColumnarValue::Array(result) = + encode_process(&value, Encoding::Base64).unwrap() + else { + panic!("unexpected value"); + }; + + let string_array = result.as_any().downcast_ref::().unwrap(); + let result_value = string_array.value(0); + assert_eq!(result_value, "AAAAAAAAAAAAAAAAAAAAAA"); + } +}