Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Extend Expr::ScalarFunction to support Expr for ScalarUDF #8180

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 42 additions & 8 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,19 +338,33 @@ impl Between {
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ScalarFunctionDefinition {
/// Resolved to a built in scalar function
/// (will be removed long term)
BuiltIn(built_in_function::BuiltinScalarFunction),
/// Resolved to a user defined function
UDF(crate::ScalarUDF),
/// A scalar function that will be called by name
Name(Arc<str>),
}

/// ScalarFunction expression
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub struct ScalarFunction {
/// The function
pub fun: built_in_function::BuiltinScalarFunction,
pub func_def: ScalarFunctionDefinition,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the API change

/// List of expressions to feed to the functions as arguments
pub args: Vec<Expr>,
}

impl ScalarFunction {
/// Create a new ScalarFunction expression
pub fn new(fun: built_in_function::BuiltinScalarFunction, args: Vec<Expr>) -> Self {
Self { fun, args }
Self {
func_def: ScalarFunctionDefinition::BuiltIn(fun),
args,
}
}
}

Expand Down Expand Up @@ -1196,9 +1210,21 @@ impl fmt::Display for Expr {
write!(f, " NULLS LAST")
}
}
Expr::ScalarFunction(func) => {
fmt_function(f, &func.fun.to_string(), false, &func.args, true)
}
Expr::ScalarFunction(func_expr) => match &func_expr.func_def {
ScalarFunctionDefinition::BuiltIn(builtin_func) => fmt_function(
f,
&builtin_func.to_string(),
false,
&func_expr.args,
true,
),
ScalarFunctionDefinition::UDF(udf) => {
fmt_function(f, &udf.name, false, &func_expr.args, true)
}
ScalarFunctionDefinition::Name(func_name) => {
fmt_function(f, &func_name, false, &func_expr.args, true)
}
},
Expr::ScalarUDF(ScalarUDF { fun, args }) => {
fmt_function(f, &fun.name, false, args, true)
}
Expand Down Expand Up @@ -1532,9 +1558,17 @@ fn create_name(e: &Expr) -> Result<String> {
}
}
}
Expr::ScalarFunction(func) => {
create_function_name(&func.fun.to_string(), false, &func.args)
}
Expr::ScalarFunction(func_expr) => match &func_expr.func_def {
ScalarFunctionDefinition::BuiltIn(builtin_func) => {
create_function_name(&builtin_func.to_string(), false, &func_expr.args)
}
ScalarFunctionDefinition::UDF(udf) => {
create_function_name(&udf.name, false, &func_expr.args)
}
ScalarFunctionDefinition::Name(name) => {
create_function_name(&name, false, &func_expr.args)
}
},
Expr::ScalarUDF(ScalarUDF { fun, args }) => {
create_function_name(&fun.name, false, args)
}
Expand Down
56 changes: 35 additions & 21 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
use super::{Between, Expr, Like};
use crate::expr::{
AggregateFunction, AggregateUDF, Alias, BinaryExpr, Cast, GetFieldAccess,
GetIndexedField, InList, InSubquery, Placeholder, ScalarFunction, ScalarUDF, Sort,
TryCast, WindowFunction,
GetIndexedField, InList, InSubquery, Placeholder, ScalarFunction,
ScalarFunctionDefinition, ScalarUDF, Sort, TryCast, WindowFunction,
};
use crate::field_util::GetFieldAccessSchema;
use crate::type_coercion::binary::get_result_type;
Expand Down Expand Up @@ -89,25 +89,39 @@ impl ExprSchemable for Expr {
.collect::<Result<Vec<_>>>()?;
Ok((fun.return_type)(&data_types)?.as_ref().clone())
}
Expr::ScalarFunction(ScalarFunction { fun, args }) => {
let arg_data_types = args
.iter()
.map(|e| e.get_type(schema))
.collect::<Result<Vec<_>>>()?;

// verify that input data types is consistent with function's `TypeSignature`
data_types(&arg_data_types, &fun.signature()).map_err(|_| {
plan_datafusion_err!(
"{}",
utils::generate_signature_error_msg(
&format!("{fun}"),
fun.signature(),
&arg_data_types,
)
)
})?;

fun.return_type(&arg_data_types)
Expr::ScalarFunction(ScalarFunction { func_def, args }) => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might have ~10 similar changes like this one during optimization/planning.
Expr::ScalarUDF branch above will be moved inside

match func_def {
ScalarFunctionDefinition::BuiltIn(fun) => {
let arg_data_types = args
.iter()
.map(|e| e.get_type(schema))
.collect::<Result<Vec<_>>>()?;

// verify that input data types is consistent with function's `TypeSignature`
data_types(&arg_data_types, &fun.signature()).map_err(|_| {
plan_datafusion_err!(
"{}",
utils::generate_signature_error_msg(
&format!("{fun}"),
fun.signature(),
&arg_data_types,
)
)
})?;

fun.return_type(&arg_data_types)
}
ScalarFunctionDefinition::UDF(fun) => {
let data_types = args
.iter()
.map(|e| e.get_type(schema))
.collect::<Result<Vec<_>>>()?;
Ok((fun.return_type)(&data_types)?.as_ref().clone())
}
ScalarFunctionDefinition::Name(_) => {
return internal_err!("Function Expr in string representation should be resolved during planning")
}
}
}
Expr::WindowFunction(WindowFunction { fun, args, .. }) => {
let data_types = args
Expand Down
Loading