diff --git a/datafusion/functions/src/crypto/basic.rs b/datafusion/functions/src/crypto/basic.rs index f1b6c71763cf3..664bb94b70e2c 100644 --- a/datafusion/functions/src/crypto/basic.rs +++ b/datafusion/functions/src/crypto/basic.rs @@ -18,10 +18,8 @@ //! "crypto" DataFusion functions use arrow::array::{ - Array, ArrayRef, BinaryArray, BinaryArrayType, BinaryViewArray, GenericBinaryArray, - OffsetSizeTrait, + Array, ArrayRef, AsArray, BinaryArray, BinaryArrayType, StringViewArray, }; -use arrow::array::{AsArray, GenericStringArray, StringViewArray}; use arrow::datatypes::DataType; use blake2::{Blake2b512, Blake2s256, Digest}; use blake3::Hasher as Blake3; @@ -84,18 +82,7 @@ define_digest_function!( "computes blake3 hash digest of the given input" ); -macro_rules! digest_to_scalar { - ($METHOD: ident, $INPUT:expr) => {{ - ScalarValue::Binary($INPUT.as_ref().map(|v| { - let mut digest = $METHOD::default(); - digest.update(v); - #[allow(deprecated)] - digest.finalize().as_slice().to_vec() - })) - }}; -} - -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub enum DigestAlgorithm { Md5, Sha224, @@ -107,23 +94,6 @@ pub enum DigestAlgorithm { Blake3, } -/// Digest computes a binary hash of the given data, accepts Utf8 or LargeUtf8 and returns a [`ColumnarValue`]. -/// Second argument is the algorithm to use. -/// Standard algorithms are md5, sha1, sha224, sha256, sha384 and sha512. -pub fn digest(args: &[ColumnarValue]) -> Result { - let [data, digest_algorithm] = take_function_args("digest", args)?; - let digest_algorithm = match digest_algorithm { - ColumnarValue::Scalar(scalar) => match scalar.try_as_str() { - Some(Some(method)) => method.parse::(), - _ => exec_err!("Unsupported data type {scalar:?} for function digest"), - }, - ColumnarValue::Array(_) => { - internal_err!("Digest using dynamically decided method is not yet supported") - } - }?; - digest_process(data, digest_algorithm) -} - impl FromStr for DigestAlgorithm { type Err = DataFusionError; fn from_str(name: &str) -> Result { @@ -183,7 +153,7 @@ pub fn md5(args: &[ColumnarValue]) -> Result { ColumnarValue::Scalar(ScalarValue::Binary(opt)) => { ColumnarValue::Scalar(ScalarValue::Utf8View(opt.map(hex_encode::<_>))) } - _ => return exec_err!("Impossibly got invalid results from digest"), + _ => return internal_err!("Impossibly got invalid results from digest"), }) } @@ -198,25 +168,7 @@ fn hex_encode>(data: T) -> String { } s } -pub fn utf8_or_binary_to_binary_type( - arg_type: &DataType, - name: &str, -) -> Result { - Ok(match arg_type { - DataType::Utf8View - | DataType::LargeUtf8 - | DataType::Utf8 - | DataType::Binary - | DataType::BinaryView - | DataType::LargeBinary => DataType::Binary, - DataType::Null => DataType::Null, - _ => { - return plan_err!( - "The {name:?} function can only accept strings or binary arrays." - ); - } - }) -} + macro_rules! digest_to_array { ($METHOD:ident, $INPUT:expr) => {{ let binary_array: BinaryArray = $INPUT @@ -232,9 +184,20 @@ macro_rules! digest_to_array { Arc::new(binary_array) }}; } + +macro_rules! digest_to_scalar { + ($METHOD: ident, $INPUT:expr) => {{ + ScalarValue::Binary($INPUT.as_ref().map(|v| { + let mut digest = $METHOD::default(); + digest.update(v); + digest.finalize().as_slice().to_vec() + })) + }}; +} + impl DigestAlgorithm { /// digest an optional string to its hash value, null values are returned as is - pub fn digest_scalar(self, value: Option<&[u8]>) -> ColumnarValue { + fn digest_scalar(self, value: Option<&[u8]>) -> ColumnarValue { ColumnarValue::Scalar(match self { Self::Md5 => digest_to_scalar!(Md5, value), Self::Sha224 => digest_to_scalar!(Sha224, value), @@ -251,49 +214,7 @@ impl DigestAlgorithm { }) } - /// digest a binary array to their hash values - pub fn digest_binary_array(self, value: &dyn Array) -> Result - where - T: OffsetSizeTrait, - { - let array = match value.data_type() { - DataType::Binary | DataType::LargeBinary => { - let v = value.as_binary::(); - self.digest_binary_array_impl::<&GenericBinaryArray>(&v) - } - DataType::BinaryView => { - let v = value.as_binary_view(); - self.digest_binary_array_impl::<&BinaryViewArray>(&v) - } - other => { - return exec_err!("unsupported type for digest_utf_array: {other:?}") - } - }; - Ok(ColumnarValue::Array(array)) - } - - /// digest a string array to their hash values - pub fn digest_utf8_array(self, value: &dyn Array) -> Result - where - T: OffsetSizeTrait, - { - let array = match value.data_type() { - DataType::Utf8 | DataType::LargeUtf8 => { - let v = value.as_string::(); - self.digest_utf8_array_impl::<&GenericStringArray>(&v) - } - DataType::Utf8View => { - let v = value.as_string_view(); - self.digest_utf8_array_impl::<&StringViewArray>(&v) - } - other => { - return exec_err!("unsupported type for digest_utf_array: {other:?}") - } - }; - Ok(ColumnarValue::Array(array)) - } - - pub fn digest_utf8_array_impl<'a, StringArrType>( + fn digest_utf8_array_impl<'a, StringArrType>( self, input_value: &StringArrType, ) -> ArrayRef @@ -324,7 +245,7 @@ impl DigestAlgorithm { } } - pub fn digest_binary_array_impl<'a, BinaryArrType>( + fn digest_binary_array_impl<'a, BinaryArrType>( self, input_value: &BinaryArrType, ) -> ArrayRef @@ -355,26 +276,40 @@ impl DigestAlgorithm { } } } + pub fn digest_process( value: &ColumnarValue, digest_algorithm: DigestAlgorithm, ) -> Result { match value { - ColumnarValue::Array(a) => match a.data_type() { - DataType::Utf8View => digest_algorithm.digest_utf8_array::(a.as_ref()), - DataType::Utf8 => digest_algorithm.digest_utf8_array::(a.as_ref()), - DataType::LargeUtf8 => digest_algorithm.digest_utf8_array::(a.as_ref()), - DataType::Binary => digest_algorithm.digest_binary_array::(a.as_ref()), - DataType::LargeBinary => { - digest_algorithm.digest_binary_array::(a.as_ref()) - } - DataType::BinaryView => { - digest_algorithm.digest_binary_array::(a.as_ref()) - } - other => exec_err!( - "Unsupported data type {other:?} for function {digest_algorithm}" - ), - }, + ColumnarValue::Array(a) => { + let output = match a.data_type() { + DataType::Utf8View => { + digest_algorithm.digest_utf8_array_impl(&a.as_string_view()) + } + DataType::Utf8 => { + digest_algorithm.digest_utf8_array_impl(&a.as_string::()) + } + DataType::LargeUtf8 => { + digest_algorithm.digest_utf8_array_impl(&a.as_string::()) + } + DataType::Binary => { + digest_algorithm.digest_binary_array_impl(&a.as_binary::()) + } + DataType::LargeBinary => { + digest_algorithm.digest_binary_array_impl(&a.as_binary::()) + } + DataType::BinaryView => { + digest_algorithm.digest_binary_array_impl(&a.as_binary_view()) + } + other => { + return exec_err!( + "Unsupported data type {other:?} for function {digest_algorithm}" + ) + } + }; + Ok(ColumnarValue::Array(output)) + } ColumnarValue::Scalar(scalar) => { match scalar { ScalarValue::Utf8View(a) diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index a4999f72f8d56..ccc8e72bfb537 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -15,11 +15,13 @@ // specific language governing permissions and limitations // under the License. -//! "crypto" DataFusion functions -use super::basic::{digest, utf8_or_binary_to_binary_type}; +use crate::crypto::basic::{digest_process, DigestAlgorithm}; + use arrow::datatypes::DataType; use datafusion_common::{ + exec_err, not_impl_err, types::{logical_binary, logical_string}, + utils::take_function_args, Result, }; use datafusion_expr::{ @@ -36,16 +38,16 @@ use std::any::Any; syntax_example = "digest(expression, algorithm)", sql_example = r#"```sql > select digest('foo', 'sha256'); -+------------------------------------------+ -| digest(Utf8("foo"), Utf8("sha256")) | -+------------------------------------------+ -| | -+------------------------------------------+ ++------------------------------------------------------------------+ +| digest(Utf8("foo"),Utf8("sha256")) | ++------------------------------------------------------------------+ +| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae | ++------------------------------------------------------------------+ ```"#, standard_argument(name = "expression", prefix = "String"), argument( name = "algorithm", - description = "String expression specifying algorithm to use. Must be one of: + description = "String expression specifying algorithm to use. Must be one of: - md5 - sha224 - sha256 @@ -60,6 +62,7 @@ use std::any::Any; pub struct DigestFunc { signature: Signature, } + impl Default for DigestFunc { fn default() -> Self { Self::new() @@ -85,6 +88,7 @@ impl DigestFunc { } } } + impl ScalarUDFImpl for DigestFunc { fn as_any(&self) -> &dyn Any { self @@ -98,14 +102,35 @@ impl ScalarUDFImpl for DigestFunc { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_or_binary_to_binary_type(&arg_types[0], self.name()) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Binary) } + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - digest(&args.args) + let [data, digest_algorithm] = take_function_args(self.name(), &args.args)?; + digest(data, digest_algorithm) } fn documentation(&self) -> Option<&Documentation> { self.doc() } } + +/// Compute binary hash of the given `data` (String or Binary array), according +/// to the specified `digest_algorithm`. See [`DigestAlgorithm`] for supported +/// algorithms. +fn digest( + data: &ColumnarValue, + digest_algorithm: &ColumnarValue, +) -> Result { + let digest_algorithm = match digest_algorithm { + ColumnarValue::Scalar(scalar) => match scalar.try_as_str() { + Some(Some(method)) => method.parse::(), + _ => exec_err!("Unsupported data type {scalar:?} for function digest"), + }, + ColumnarValue::Array(_) => { + not_impl_err!("Digest using dynamically decided method is not yet supported") + } + }?; + digest_process(data, digest_algorithm) +} diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index 88859fdee34a7..325d2d00a5697 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -15,12 +15,10 @@ // specific language governing permissions and limitations // under the License. -//! "crypto" DataFusion functions use crate::crypto::basic::md5; use arrow::datatypes::DataType; use datafusion_common::{ - plan_err, - types::{logical_binary, logical_string, NativeType}, + types::{logical_binary, logical_string}, Result, }; use datafusion_expr::{ @@ -37,11 +35,11 @@ use std::any::Any; syntax_example = "md5(expression)", sql_example = r#"```sql > select md5('foo'); -+-------------------------------------+ -| md5(Utf8("foo")) | -+-------------------------------------+ -| | -+-------------------------------------+ ++----------------------------------+ +| md5(Utf8("foo")) | ++----------------------------------+ +| acbd18db4cc2f85cedef654fccc4a4d8 | ++----------------------------------+ ```"#, standard_argument(name = "expression", prefix = "String") )] @@ -49,6 +47,7 @@ use std::any::Any; pub struct Md5Func { signature: Signature, } + impl Default for Md5Func { fn default() -> Self { Self::new() @@ -60,15 +59,11 @@ impl Md5Func { Self { signature: Signature::one_of( vec![ - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], - NativeType::String, + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Native(logical_string()), )]), - TypeSignature::Coercible(vec![Coercion::new_implicit( + TypeSignature::Coercible(vec![Coercion::new_exact( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], - NativeType::Binary, )]), ], Volatility::Immutable, @@ -76,6 +71,7 @@ impl Md5Func { } } } + impl ScalarUDFImpl for Md5Func { fn as_any(&self) -> &dyn Any { self @@ -89,30 +85,10 @@ impl ScalarUDFImpl for Md5Func { &self.signature } - fn return_type(&self, arg_types: &[DataType]) -> Result { - use DataType::*; - Ok(match &arg_types[0] { - LargeUtf8 | LargeBinary => Utf8View, - Utf8View | Utf8 | Binary | BinaryView => Utf8View, - Null => Null, - Dictionary(_, t) => match **t { - LargeUtf8 | LargeBinary => Utf8View, - Utf8 | Binary | BinaryView => Utf8View, - Null => Null, - _ => { - return plan_err!( - "the md5 can only accept strings but got {:?}", - **t - ); - } - }, - other => { - return plan_err!( - "The md5 function can only accept strings. Got {other}" - ); - } - }) + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Utf8View) } + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { md5(&args.args) } diff --git a/datafusion/functions/src/crypto/mod.rs b/datafusion/functions/src/crypto/mod.rs index 62ea3c2e27371..fd15db44c795d 100644 --- a/datafusion/functions/src/crypto/mod.rs +++ b/datafusion/functions/src/crypto/mod.rs @@ -23,16 +23,13 @@ use std::sync::Arc; pub mod basic; pub mod digest; pub mod md5; -pub mod sha224; -pub mod sha256; -pub mod sha384; -pub mod sha512; +pub mod sha; make_udf_function!(digest::DigestFunc, digest); make_udf_function!(md5::Md5Func, md5); -make_udf_function!(sha224::SHA224Func, sha224); -make_udf_function!(sha256::SHA256Func, sha256); -make_udf_function!(sha384::SHA384Func, sha384); -make_udf_function!(sha512::SHA512Func, sha512); +make_udf_function!(sha::SHAFunc, sha224, sha::SHAFunc::sha224); +make_udf_function!(sha::SHAFunc, sha256, sha::SHAFunc::sha256); +make_udf_function!(sha::SHAFunc, sha384, sha::SHAFunc::sha384); +make_udf_function!(sha::SHAFunc, sha512, sha::SHAFunc::sha512); pub mod expr_fn { export_functions!(( diff --git a/datafusion/functions/src/crypto/sha.rs b/datafusion/functions/src/crypto/sha.rs new file mode 100644 index 0000000000000..e7b1cef3cebe3 --- /dev/null +++ b/datafusion/functions/src/crypto/sha.rs @@ -0,0 +1,175 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::crypto::basic::{digest_process, DigestAlgorithm}; + +use arrow::datatypes::DataType; +use datafusion_common::{ + types::{logical_binary, logical_string}, + utils::take_function_args, + Result, +}; +use datafusion_expr::{ + ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, + TypeSignature, Volatility, +}; +use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; +use datafusion_macros::user_doc; +use std::any::Any; + +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-224 hash of a binary string.", + syntax_example = "sha224(expression)", + sql_example = r#"```sql +> select sha224('foo'); ++----------------------------------------------------------+ +| sha224(Utf8("foo")) | ++----------------------------------------------------------+ +| 0808f64e60d58979fcb676c96ec938270dea42445aeefcd3a4e6f8db | ++----------------------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] +struct SHA224Doc; + +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-256 hash of a binary string.", + syntax_example = "sha256(expression)", + sql_example = r#"```sql +> select sha256('foo'); ++------------------------------------------------------------------+ +| sha256(Utf8("foo")) | ++------------------------------------------------------------------+ +| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae | ++------------------------------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] +struct SHA256Doc; + +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-384 hash of a binary string.", + syntax_example = "sha384(expression)", + sql_example = r#"```sql +> select sha384('foo'); ++--------------------------------------------------------------------------------------------------+ +| sha384(Utf8("foo")) | ++--------------------------------------------------------------------------------------------------+ +| 98c11ffdfdd540676b1a137cb1a22b2a70350c9a44171d6b1180c6be5cbb2ee3f79d532c8a1dd9ef2e8e08e752a3babb | ++--------------------------------------------------------------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] +struct SHA384Doc; + +#[user_doc( + doc_section(label = "Hashing Functions"), + description = "Computes the SHA-512 hash of a binary string.", + syntax_example = "sha512(expression)", + sql_example = r#"```sql +> select sha512('foo'); ++----------------------------------------------------------------------------------------------------------------------------------+ +| sha512(Utf8("foo")) | ++----------------------------------------------------------------------------------------------------------------------------------+ +| f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7 | ++----------------------------------------------------------------------------------------------------------------------------------+ +```"#, + standard_argument(name = "expression", prefix = "String") +)] +struct SHA512Doc; + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct SHAFunc { + signature: Signature, + name: &'static str, + algorithm: DigestAlgorithm, +} + +impl SHAFunc { + pub fn sha224() -> Self { + Self::new("sha224", DigestAlgorithm::Sha224) + } + + pub fn sha256() -> Self { + Self::new("sha256", DigestAlgorithm::Sha256) + } + + pub fn sha384() -> Self { + Self::new("sha384", DigestAlgorithm::Sha384) + } + + pub fn sha512() -> Self { + Self::new("sha512", DigestAlgorithm::Sha512) + } + + fn new(name: &'static str, algorithm: DigestAlgorithm) -> Self { + Self { + signature: Signature::one_of( + vec![ + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Native(logical_string()), + )]), + TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignatureClass::Native(logical_binary()), + )]), + ], + Volatility::Immutable, + ), + name, + algorithm, + } + } +} + +impl ScalarUDFImpl for SHAFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + self.name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Binary) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let [data] = take_function_args(self.name(), args.args)?; + digest_process(&data, self.algorithm) + } + + fn documentation(&self) -> Option<&Documentation> { + match self.algorithm { + DigestAlgorithm::Sha224 => SHA224Doc {}.doc(), + DigestAlgorithm::Sha256 => SHA256Doc {}.doc(), + DigestAlgorithm::Sha384 => SHA384Doc {}.doc(), + DigestAlgorithm::Sha512 => SHA512Doc {}.doc(), + DigestAlgorithm::Md5 + | DigestAlgorithm::Blake2s + | DigestAlgorithm::Blake2b + | DigestAlgorithm::Blake3 => unreachable!(), + } + } +} diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs deleted file mode 100644 index 69b79cce72c4e..0000000000000 --- a/datafusion/functions/src/crypto/sha224.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! "crypto" DataFusion functions -use super::basic::{sha224, utf8_or_binary_to_binary_type}; -use arrow::datatypes::DataType; -use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, - Result, -}; -use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - TypeSignature, Volatility, -}; -use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; -use datafusion_macros::user_doc; -use std::any::Any; - -#[user_doc( - doc_section(label = "Hashing Functions"), - description = "Computes the SHA-224 hash of a binary string.", - syntax_example = "sha224(expression)", - sql_example = r#"```sql -> select sha224('foo'); -+------------------------------------------+ -| sha224(Utf8("foo")) | -+------------------------------------------+ -| | -+------------------------------------------+ -```"#, - standard_argument(name = "expression", prefix = "String") -)] -#[derive(Debug, PartialEq, Eq, Hash)] -pub struct SHA224Func { - signature: Signature, -} - -impl Default for SHA224Func { - fn default() -> Self { - Self::new() - } -} - -impl SHA224Func { - pub fn new() -> Self { - Self { - signature: Signature::one_of( - vec![ - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], - NativeType::String, - )]), - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], - NativeType::Binary, - )]), - ], - Volatility::Immutable, - ), - } - } -} - -impl ScalarUDFImpl for SHA224Func { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "sha224" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_or_binary_to_binary_type(&arg_types[0], self.name()) - } - - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - sha224(&args.args) - } - - fn documentation(&self) -> Option<&Documentation> { - self.doc() - } -} diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs deleted file mode 100644 index 9a948ba50c9e1..0000000000000 --- a/datafusion/functions/src/crypto/sha256.rs +++ /dev/null @@ -1,102 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! "crypto" DataFusion functions -use super::basic::{sha256, utf8_or_binary_to_binary_type}; -use arrow::datatypes::DataType; -use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, - Result, -}; -use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - TypeSignature, Volatility, -}; -use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; -use datafusion_macros::user_doc; -use std::any::Any; - -#[user_doc( - doc_section(label = "Hashing Functions"), - description = "Computes the SHA-256 hash of a binary string.", - syntax_example = "sha256(expression)", - sql_example = r#"```sql -> select sha256('foo'); -+--------------------------------------+ -| sha256(Utf8("foo")) | -+--------------------------------------+ -| | -+--------------------------------------+ -```"#, - standard_argument(name = "expression", prefix = "String") -)] -#[derive(Debug, PartialEq, Eq, Hash)] -pub struct SHA256Func { - signature: Signature, -} -impl Default for SHA256Func { - fn default() -> Self { - Self::new() - } -} - -impl SHA256Func { - pub fn new() -> Self { - Self { - signature: Signature::one_of( - vec![ - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], - NativeType::String, - )]), - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], - NativeType::Binary, - )]), - ], - Volatility::Immutable, - ), - } - } -} -impl ScalarUDFImpl for SHA256Func { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "sha256" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_or_binary_to_binary_type(&arg_types[0], self.name()) - } - - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - sha256(&args.args) - } - - fn documentation(&self) -> Option<&Documentation> { - self.doc() - } -} diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs deleted file mode 100644 index 9e363cf883d29..0000000000000 --- a/datafusion/functions/src/crypto/sha384.rs +++ /dev/null @@ -1,102 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! "crypto" DataFusion functions -use super::basic::{sha384, utf8_or_binary_to_binary_type}; -use arrow::datatypes::DataType; -use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, - Result, -}; -use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - TypeSignature, Volatility, -}; -use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; -use datafusion_macros::user_doc; -use std::any::Any; - -#[user_doc( - doc_section(label = "Hashing Functions"), - description = "Computes the SHA-384 hash of a binary string.", - syntax_example = "sha384(expression)", - sql_example = r#"```sql -> select sha384('foo'); -+-----------------------------------------+ -| sha384(Utf8("foo")) | -+-----------------------------------------+ -| | -+-----------------------------------------+ -```"#, - standard_argument(name = "expression", prefix = "String") -)] -#[derive(Debug, PartialEq, Eq, Hash)] -pub struct SHA384Func { - signature: Signature, -} -impl Default for SHA384Func { - fn default() -> Self { - Self::new() - } -} - -impl SHA384Func { - pub fn new() -> Self { - Self { - signature: Signature::one_of( - vec![ - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], - NativeType::String, - )]), - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], - NativeType::Binary, - )]), - ], - Volatility::Immutable, - ), - } - } -} -impl ScalarUDFImpl for SHA384Func { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "sha384" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_or_binary_to_binary_type(&arg_types[0], self.name()) - } - - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - sha384(&args.args) - } - - fn documentation(&self) -> Option<&Documentation> { - self.doc() - } -} diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs deleted file mode 100644 index a185698ca46ff..0000000000000 --- a/datafusion/functions/src/crypto/sha512.rs +++ /dev/null @@ -1,102 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! "crypto" DataFusion functions -use super::basic::{sha512, utf8_or_binary_to_binary_type}; -use arrow::datatypes::DataType; -use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, - Result, -}; -use datafusion_expr::{ - ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, - TypeSignature, Volatility, -}; -use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; -use datafusion_macros::user_doc; -use std::any::Any; - -#[user_doc( - doc_section(label = "Hashing Functions"), - description = "Computes the SHA-512 hash of a binary string.", - syntax_example = "sha512(expression)", - sql_example = r#"```sql -> select sha512('foo'); -+-------------------------------------------+ -| sha512(Utf8("foo")) | -+-------------------------------------------+ -| | -+-------------------------------------------+ -```"#, - standard_argument(name = "expression", prefix = "String") -)] -#[derive(Debug, PartialEq, Eq, Hash)] -pub struct SHA512Func { - signature: Signature, -} -impl Default for SHA512Func { - fn default() -> Self { - Self::new() - } -} - -impl SHA512Func { - pub fn new() -> Self { - Self { - signature: Signature::one_of( - vec![ - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], - NativeType::String, - )]), - TypeSignature::Coercible(vec![Coercion::new_implicit( - TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], - NativeType::Binary, - )]), - ], - Volatility::Immutable, - ), - } - } -} -impl ScalarUDFImpl for SHA512Func { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "sha512" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, arg_types: &[DataType]) -> Result { - utf8_or_binary_to_binary_type(&arg_types[0], self.name()) - } - - fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - sha512(&args.args) - } - - fn documentation(&self) -> Option<&Documentation> { - self.doc() - } -} diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index df88d26c9c9de..d724ddae30e12 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -1398,6 +1398,11 @@ SELECT md5('tom'); ---- 34b7da764b21d298ef307d04d8152dc5 +query T +SELECT md5(arrow_cast('tom', 'Dictionary(Int32, Utf8)')); +---- +34b7da764b21d298ef307d04d8152dc5 + query ? SELECT digest('tom','md5'); ---- diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index 7c88d1fd9c3eb..7125284abb47d 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -4767,11 +4767,11 @@ digest(expression, algorithm) ```sql > select digest('foo', 'sha256'); -+------------------------------------------+ -| digest(Utf8("foo"), Utf8("sha256")) | -+------------------------------------------+ -| | -+------------------------------------------+ ++------------------------------------------------------------------+ +| digest(Utf8("foo"),Utf8("sha256")) | ++------------------------------------------------------------------+ +| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae | ++------------------------------------------------------------------+ ``` ### `md5` @@ -4790,11 +4790,11 @@ md5(expression) ```sql > select md5('foo'); -+-------------------------------------+ -| md5(Utf8("foo")) | -+-------------------------------------+ -| | -+-------------------------------------+ ++----------------------------------+ +| md5(Utf8("foo")) | ++----------------------------------+ +| acbd18db4cc2f85cedef654fccc4a4d8 | ++----------------------------------+ ``` ### `sha224` @@ -4813,11 +4813,11 @@ sha224(expression) ```sql > select sha224('foo'); -+------------------------------------------+ -| sha224(Utf8("foo")) | -+------------------------------------------+ -| | -+------------------------------------------+ ++----------------------------------------------------------+ +| sha224(Utf8("foo")) | ++----------------------------------------------------------+ +| 0808f64e60d58979fcb676c96ec938270dea42445aeefcd3a4e6f8db | ++----------------------------------------------------------+ ``` ### `sha256` @@ -4836,11 +4836,11 @@ sha256(expression) ```sql > select sha256('foo'); -+--------------------------------------+ -| sha256(Utf8("foo")) | -+--------------------------------------+ -| | -+--------------------------------------+ ++------------------------------------------------------------------+ +| sha256(Utf8("foo")) | ++------------------------------------------------------------------+ +| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae | ++------------------------------------------------------------------+ ``` ### `sha384` @@ -4859,11 +4859,11 @@ sha384(expression) ```sql > select sha384('foo'); -+-----------------------------------------+ -| sha384(Utf8("foo")) | -+-----------------------------------------+ -| | -+-----------------------------------------+ ++--------------------------------------------------------------------------------------------------+ +| sha384(Utf8("foo")) | ++--------------------------------------------------------------------------------------------------+ +| 98c11ffdfdd540676b1a137cb1a22b2a70350c9a44171d6b1180c6be5cbb2ee3f79d532c8a1dd9ef2e8e08e752a3babb | ++--------------------------------------------------------------------------------------------------+ ``` ### `sha512` @@ -4882,11 +4882,11 @@ sha512(expression) ```sql > select sha512('foo'); -+-------------------------------------------+ -| sha512(Utf8("foo")) | -+-------------------------------------------+ -| | -+-------------------------------------------+ ++----------------------------------------------------------------------------------------------------------------------------------+ +| sha512(Utf8("foo")) | ++----------------------------------------------------------------------------------------------------------------------------------+ +| f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7 | ++----------------------------------------------------------------------------------------------------------------------------------+ ``` ## Union Functions