diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index 2ffac6a775d7..7d155bb16c72 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -597,7 +597,7 @@ async fn test_fn_md5() -> Result<()> { #[tokio::test] #[cfg(feature = "unicode_expressions")] async fn test_fn_regexp_like() -> Result<()> { - let expr = regexp_like(col("a"), lit("[a-z]")); + let expr = regexp_like(col("a"), lit("[a-z]"), None); let expected = [ "+-----------------------------------+", @@ -612,13 +612,28 @@ async fn test_fn_regexp_like() -> Result<()> { assert_fn_batches!(expr, expected); + let expr = regexp_like(col("a"), lit("abc"), Some(lit("i"))); + + let expected = [ + "+-------------------------------------------+", + "| regexp_like(test.a,Utf8(\"abc\"),Utf8(\"i\")) |", + "+-------------------------------------------+", + "| true |", + "| true |", + "| false |", + "| true |", + "+-------------------------------------------+", + ]; + + assert_fn_batches!(expr, expected); + Ok(()) } #[tokio::test] #[cfg(feature = "unicode_expressions")] async fn test_fn_regexp_match() -> Result<()> { - let expr = regexp_match(col("a"), lit("[a-z]")); + let expr = regexp_match(col("a"), lit("[a-z]"), None); let expected = [ "+------------------------------------+", @@ -633,13 +648,28 @@ async fn test_fn_regexp_match() -> Result<()> { assert_fn_batches!(expr, expected); + let expr = regexp_match(col("a"), lit("[A-Z]"), Some(lit("i"))); + + let expected = [ + "+----------------------------------------------+", + "| regexp_match(test.a,Utf8(\"[A-Z]\"),Utf8(\"i\")) |", + "+----------------------------------------------+", + "| [a] |", + "| [a] |", + "| [C] |", + "| [A] |", + "+----------------------------------------------+", + ]; + + assert_fn_batches!(expr, expected); + Ok(()) } #[tokio::test] #[cfg(feature = "unicode_expressions")] async fn test_fn_regexp_replace() -> Result<()> { - let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), lit("g")); + let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), Some(lit("g"))); let expected = [ "+----------------------------------------------------------+", @@ -654,6 +684,21 @@ async fn test_fn_regexp_replace() -> Result<()> { assert_fn_batches!(expr, expected); + let expr = regexp_replace(col("a"), lit("[a-z]"), lit("x"), None); + + let expected = [ + "+------------------------------------------------+", + "| regexp_replace(test.a,Utf8(\"[a-z]\"),Utf8(\"x\")) |", + "+------------------------------------------------+", + "| xbcDEF |", + "| xbc123 |", + "| CBAxef |", + "| 123AxcDef |", + "+------------------------------------------------+", + ]; + + assert_fn_batches!(expr, expected); + Ok(()) } diff --git a/datafusion/functions/src/regex/mod.rs b/datafusion/functions/src/regex/mod.rs index 5c12d4559e74..884db24d9ec8 100644 --- a/datafusion/functions/src/regex/mod.rs +++ b/datafusion/functions/src/regex/mod.rs @@ -28,12 +28,44 @@ make_udf_function!( REGEXP_REPLACE, regexp_replace ); -export_functions!(( - regexp_match, - input_arg1 input_arg2, - "returns a list of regular expression matches in a string. " -),( - regexp_like, - input_arg1 input_arg2, - "Returns true if a has at least one match in a string,false otherwise." -),(regexp_replace, arg1 arg2 arg3 arg4, "Replaces substrings in a string that match")); + +pub mod expr_fn { + use datafusion_expr::Expr; + + /// Returns a list of regular expression matches in a string. + pub fn regexp_match(values: Expr, regex: Expr, flags: Option) -> Expr { + let mut args = vec![values, regex]; + if let Some(flags) = flags { + args.push(flags); + }; + super::regexp_match().call(args) + } + + /// Returns true if a has at least one match in a string, false otherwise. + pub fn regexp_like(values: Expr, regex: Expr, flags: Option) -> Expr { + let mut args = vec![values, regex]; + if let Some(flags) = flags { + args.push(flags); + }; + super::regexp_like().call(args) + } + + /// Replaces substrings in a string that match. + pub fn regexp_replace( + string: Expr, + pattern: Expr, + replacement: Expr, + flags: Option, + ) -> Expr { + let mut args = vec![string, pattern, replacement]; + if let Some(flags) = flags { + args.push(flags); + }; + super::regexp_replace().call(args) + } +} + +#[doc = r" Return a list of all functions in this package"] +pub fn functions() -> Vec> { + vec![regexp_match(), regexp_like(), regexp_replace()] +}