From da8b01c36a2c978e6161b0f983b6ace5f4162fa9 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 25 Aug 2025 16:35:31 +0900 Subject: [PATCH 1/2] Add uuid builtin function --- crates/connect/src/functions/mod.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/crates/connect/src/functions/mod.rs b/crates/connect/src/functions/mod.rs index 4884939..9cc4aea 100644 --- a/crates/connect/src/functions/mod.rs +++ b/crates/connect/src/functions/mod.rs @@ -166,6 +166,11 @@ pub fn expr(val: &str) -> Column { }) } +/// Returns an universally unique identifier (UUID) string. The value is returned as a canonical UUID 36-character string. +pub fn uuid() -> Column { + invoke_func("uuid", vec![lit(random::())]) +} + // Math Functions gen_func!(sqrt, [col: Column], "Computes the square root of the specified float value."); @@ -1654,11 +1659,13 @@ mod tests { use super::*; use core::f64; + use regex::Regex; use std::sync::Arc; use arrow::{ array::{ - ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, StringArray, StructArray, + Array, ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, StringArray, + StructArray, }, datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, @@ -1901,6 +1908,24 @@ mod tests { false ); + #[tokio::test] + async fn test_func_uuid() -> Result<(), SparkError> { + let spark = setup().await; + + let df = spark.range(Some(1), 2, 1, Some(1)); + let res = df.select([uuid().alias("uuid")]).collect().await?; + let res_column = res.column_by_name("uuid").unwrap(); + let res_column = res_column.as_any().downcast_ref::().unwrap(); + assert_eq!(res_column.len(), 1); + + let uuid = res_column.value(0); + let expect_pattern = + Regex::new("^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$").unwrap(); + assert!(expect_pattern.is_match(uuid)); + + Ok(()) + } + // math functions test_func!( test_func_sqrt, From c5e39e0cd6a1c357c1f8f5085a1c6906156b19aa Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 25 Aug 2025 16:53:44 +0900 Subject: [PATCH 2/2] Fix for Spark 3.5 --- crates/connect/src/functions/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/connect/src/functions/mod.rs b/crates/connect/src/functions/mod.rs index 9cc4aea..0a8d9c5 100644 --- a/crates/connect/src/functions/mod.rs +++ b/crates/connect/src/functions/mod.rs @@ -166,10 +166,7 @@ pub fn expr(val: &str) -> Column { }) } -/// Returns an universally unique identifier (UUID) string. The value is returned as a canonical UUID 36-character string. -pub fn uuid() -> Column { - invoke_func("uuid", vec![lit(random::())]) -} +gen_func!(uuid, [], "Returns an universally unique identifier (UUID) string. The value is returned as a canonical UUID 36-character string."); // Math Functions