Skip to content

Commit

Permalink
[SPARK-35440][SQL] Add function type to ExpressionInfo for UDF
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Add the function type, such as "scala_udf", "python_udf", "java_udf", "hive", "built-in" to the `ExpressionInfo` for UDF.

### Why are the changes needed?
Make the `ExpressionInfo` of UDF more meaningful

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
existing and newly added UT

Closes #32587 from linhongliu-db/udf-language.

Authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
linhongliu-db authored and cloud-fan committed May 26, 2021
1 parent 20750a3 commit af1dba7
Show file tree
Hide file tree
Showing 11 changed files with 191 additions and 77 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* `usage()` will be used for the function usage in brief way.
*
* These below are concatenated and used for the function usage in verbose way, suppose arguments,
* examples, note, group, since and deprecated will be provided.
* examples, note, group, source, since and deprecated will be provided.
*
* `arguments()` describes arguments for the expression.
*
Expand All @@ -42,14 +42,17 @@
* `group()` describes the category that the expression belongs to. The valid value is
* "agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs" and "window_funcs".
*
* `source()` describe the source of the function. The valid value is "built-in", "hive",
* "python_udf", "scala_udf", "java_udf".
*
* `since()` contains version information for the expression. Version is specified by,
* for example, "2.2.0".
*
* `deprecated()` contains deprecation information for the expression optionally, for example,
* "Deprecated since 2.2.0. Use something else instead".
*
* The format, in particular for `arguments()`, `examples()`,`note()`, `group()`, `since()` and
* `deprecated()`, should strictly be as follows.
* The format, in particular for `arguments()`, `examples()`,`note()`, `group()`, `source()`,
* `since()` and `deprecated()`, should strictly be as follows.
*
* <pre>
* <code>@ExpressionDescription(
Expand All @@ -72,6 +75,7 @@
* ...
* """,
* group = "agg_funcs",
* source = "built-in",
* since = "3.0.0",
* deprecated = """
* ...
Expand Down Expand Up @@ -112,4 +116,5 @@
String group() default "";
String since() default "";
String deprecated() default "";
String source() default "built-in";
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class ExpressionInfo {
private String group;
private String since;
private String deprecated;
private String source;

private static final Set<String> validGroups =
new HashSet<>(Arrays.asList("agg_funcs", "array_funcs", "binary_funcs", "bitwise_funcs",
Expand All @@ -46,6 +47,9 @@ public class ExpressionInfo {
"lambda_funcs", "map_funcs", "math_funcs", "misc_funcs", "string_funcs", "struct_funcs",
"window_funcs", "xml_funcs", "table_funcs"));

private static final Set<String> validSources =
new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf", "java_udf"));

public String getClassName() {
return className;
}
Expand Down Expand Up @@ -95,6 +99,10 @@ public String getDb() {
return db;
}

public String getSource() {
return source;
}

public ExpressionInfo(
String className,
String db,
Expand All @@ -105,7 +113,8 @@ public ExpressionInfo(
String note,
String group,
String since,
String deprecated) {
String deprecated,
String source) {
assert name != null;
assert arguments != null;
assert examples != null;
Expand All @@ -114,6 +123,7 @@ public ExpressionInfo(
assert group != null;
assert since != null;
assert deprecated != null;
assert source != null;

this.className = className;
this.db = db;
Expand All @@ -125,6 +135,7 @@ public ExpressionInfo(
this.group = group;
this.since = since;
this.deprecated = deprecated;
this.source = source;

// Make the extended description.
this.extended = arguments + examples;
Expand All @@ -144,6 +155,11 @@ public ExpressionInfo(
this.name + "]. It should be a value in " + validGroups + "; however, " +
"got [" + group + "].");
}
if (!source.isEmpty() && !validSources.contains(source)) {
throw new IllegalArgumentException("'source' is malformed in the expression [" +
this.name + "]. It should be a value in " + validSources + "; however, " +
"got [" + source + "].");
}
if (!since.isEmpty()) {
if (Integer.parseInt(since.split("\\.")[0]) < 0) {
throw new IllegalArgumentException("'since' is malformed in the expression [" +
Expand All @@ -164,11 +180,11 @@ public ExpressionInfo(
}

public ExpressionInfo(String className, String name) {
this(className, null, name, null, "", "", "", "", "", "");
this(className, null, name, null, "", "", "", "", "", "", "");
}

public ExpressionInfo(String className, String db, String name) {
this(className, db, name, null, "", "", "", "", "", "");
this(className, db, name, null, "", "", "", "", "", "", "");
}

/**
Expand All @@ -179,7 +195,7 @@ public ExpressionInfo(String className, String db, String name) {
public ExpressionInfo(String className, String db, String name, String usage, String extended) {
// `arguments` and `examples` are concatenated for the extended description. So, here
// simply pass the `extended` as `arguments` and an empty string for `examples`.
this(className, db, name, usage, extended, "", "", "", "", "");
this(className, db, name, usage, extended, "", "", "", "", "", "");
}

private String replaceFunctionName(String usage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,20 @@ trait FunctionRegistryBase[T] {

type FunctionBuilder = Seq[Expression] => T

final def registerFunction(name: FunctionIdentifier, builder: FunctionBuilder): Unit = {
final def registerFunction(
name: FunctionIdentifier, builder: FunctionBuilder, source: String): Unit = {
val info = new ExpressionInfo(
builder.getClass.getCanonicalName, name.database.orNull, name.funcName)
builder.getClass.getCanonicalName,
name.database.orNull,
name.funcName,
null,
"",
"",
"",
"",
"",
"",
source)
registerFunction(name, info, builder)
}

Expand All @@ -59,10 +70,12 @@ trait FunctionRegistryBase[T] {
builder: FunctionBuilder): Unit

/* Create or replace a temporary function. */
final def createOrReplaceTempFunction(name: String, builder: FunctionBuilder): Unit = {
final def createOrReplaceTempFunction(
name: String, builder: FunctionBuilder, source: String): Unit = {
registerFunction(
FunctionIdentifier(name),
builder)
builder,
source)
}

@throws[AnalysisException]("If function does not exist")
Expand Down Expand Up @@ -157,7 +170,8 @@ object FunctionRegistryBase {
df.note(),
df.group(),
df.since(),
df.deprecated())
df.deprecated(),
df.source())
} else {
// This exists for the backward compatibility with old `ExpressionDescription`s defining
// the extended description in `extended()`.
Expand Down Expand Up @@ -721,7 +735,7 @@ object FunctionRegistry {
val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`."
val expressionInfo =
new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "",
"conversion_funcs", "2.0.1", "")
"conversion_funcs", "2.0.1", "", "built-in")
(name, (expressionInfo, builder))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1461,7 +1461,18 @@ class SessionCatalog(
if (functionRegistry.functionExists(func) && !overrideIfExists) {
throw QueryCompilationErrors.functionAlreadyExistsError(func)
}
val info = new ExpressionInfo(funcDefinition.className, func.database.orNull, func.funcName)
val info = new ExpressionInfo(
funcDefinition.className,
func.database.orNull,
func.funcName,
null,
"",
"",
"",
"",
"",
"",
"hive")
val builder =
functionBuilder.getOrElse {
val className = funcDefinition.className
Expand Down Expand Up @@ -1552,7 +1563,15 @@ class SessionCatalog(
new ExpressionInfo(
metadata.className,
qualifiedName.database.orNull,
qualifiedName.identifier)
qualifiedName.identifier,
null,
"",
"",
"",
"",
"",
"",
"hive")
} else {
failFunctionLookup(name)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,8 @@ object CreateStruct {
"",
"struct_funcs",
"1.4.0",
"")
"",
"built-in")
("struct", (info, this.create))
}
}
Expand Down
Loading

0 comments on commit af1dba7

Please sign in to comment.