Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ pub enum TypeSignatureClass {
Float,
Decimal,
Numeric,
/// Encompasses both the native Binary as well as arbitrarily sized FixedSizeBinary types
/// Encompasses both the native Binary/LargeBinary types as well as arbitrarily sized FixedSizeBinary types
Binary,
}

Expand Down
92 changes: 36 additions & 56 deletions datafusion/spark/src/function/math/hex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,24 @@
use std::any::Any;
use std::sync::Arc;

use crate::function::error_utils::{
invalid_arg_count_exec_err, unsupported_data_type_exec_err,
};
use arrow::array::{Array, StringArray};
use arrow::datatypes::DataType;
use arrow::{
array::{as_dictionary_array, as_largestring_array, as_string_array},
datatypes::Int32Type,
};
use datafusion_common::cast::as_large_binary_array;
use datafusion_common::cast::as_string_view_array;
use datafusion_common::types::{logical_int64, logical_string, NativeType};
use datafusion_common::utils::take_function_args;
use datafusion_common::{
cast::{as_binary_array, as_fixed_size_binary_array, as_int64_array},
exec_err, DataFusionError,
};
use datafusion_expr::Signature;
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Volatility};
use datafusion_expr::{
Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
TypeSignatureClass, Volatility,
};
use std::fmt::Write;

/// <https://spark.apache.org/docs/latest/api/sql/index.html#hex>
Expand All @@ -52,8 +53,27 @@ impl Default for SparkHex {

impl SparkHex {
pub fn new() -> Self {
let int64 = Coercion::new_implicit(
TypeSignatureClass::Native(logical_int64()),
vec![TypeSignatureClass::Numeric],
NativeType::Int64,
);

let string = Coercion::new_exact(TypeSignatureClass::Native(logical_string()));

let binary = Coercion::new_exact(TypeSignatureClass::Binary);

let variants = vec![
// accepts numeric types
TypeSignature::Coercible(vec![int64]),
// accepts string types (Utf8, Utf8View, LargeUtf8)
TypeSignature::Coercible(vec![string]),
// accepts binary types (Binary, FixedSizeBinary, LargeBinary)
TypeSignature::Coercible(vec![binary]),
];

Self {
signature: Signature::user_defined(Volatility::Immutable),
signature: Signature::one_of(variants, Volatility::Immutable),
aliases: vec![],
}
}
Expand Down Expand Up @@ -89,56 +109,6 @@ impl ScalarUDFImpl for SparkHex {
fn aliases(&self) -> &[String] {
&self.aliases
}

fn coerce_types(
&self,
arg_types: &[DataType],
) -> datafusion_common::Result<Vec<DataType>> {
if arg_types.len() != 1 {
return Err(invalid_arg_count_exec_err("hex", (1, 1), arg_types.len()));
}
match &arg_types[0] {
DataType::Int64
| DataType::Utf8
| DataType::Utf8View
| DataType::LargeUtf8
| DataType::Binary
| DataType::LargeBinary => Ok(vec![arg_types[0].clone()]),
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
DataType::Int64
| DataType::Utf8
| DataType::Utf8View
| DataType::LargeUtf8
| DataType::Binary
| DataType::LargeBinary => Ok(vec![arg_types[0].clone()]),
other => {
if other.is_numeric() {
Ok(vec![DataType::Dictionary(
key_type.clone(),
Box::new(DataType::Int64),
)])
} else {
Err(unsupported_data_type_exec_err(
"hex",
"Numeric, String, or Binary",
&arg_types[0],
))
}
}
},
other => {
if other.is_numeric() {
Ok(vec![DataType::Int64])
} else {
Err(unsupported_data_type_exec_err(
"hex",
"Numeric, String, or Binary",
&arg_types[0],
))
}
}
}
}
}

fn hex_int64(num: i64) -> String {
Expand Down Expand Up @@ -240,6 +210,16 @@ pub fn compute_hex(

Ok(ColumnarValue::Array(Arc::new(hexed)))
}
DataType::LargeBinary => {
let array = as_large_binary_array(array)?;

let hexed: StringArray = array
.iter()
.map(|v| v.map(|b| hex_bytes(b, lowercase)).transpose())
.collect::<Result<_, _>>()?;

Ok(ColumnarValue::Array(Arc::new(hexed)))
}
DataType::FixedSizeBinary(_) => {
let array = as_fixed_size_binary_array(array)?;

Expand Down
15 changes: 15 additions & 0 deletions datafusion/sqllogictest/test_files/spark/math/hex.slt
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,18 @@ SELECT hex(column1) FROM t_utf8view;
666F6F
NULL
666F6F62617262617A

query T
SELECT hex(column1) FROM VALUES (arrow_cast('hello', 'LargeBinary')), (NULL), (arrow_cast('world', 'LargeBinary'));
----
68656C6C6F
NULL
776F726C64

statement error Function 'hex' expects 1 arguments but received 2
SELECT hex(1, 2);

query T
SELECT hex(arrow_cast('test', 'LargeBinary')) as lar_b;
----
74657374