New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
UDF tidy ups #1429
UDF tidy ups #1429
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,31 +26,30 @@ | |
import java.nio.charset.Charset; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.function.Predicate; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* Used to restrict the classes that can be loaded by user supplied UDFs | ||
*/ | ||
public class Blacklist implements Predicate<String> { | ||
private static final Logger logger = LoggerFactory.getLogger(Blacklist.class); | ||
private static final String EMPTY_BLACKLIST = "^(?)\\.?.*$"; | ||
private static final String EMPTY_BLACKLIST = "^(?:)\\.?.*$"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Split into two constants:
Then your collect line can be:
And your empty check can be:
Which I think is easier to understand. |
||
|
||
private String blackList; | ||
private String blackList = ".*"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: moving ".*" into constant |
||
|
||
Blacklist(final File inputFile) { | ||
try { | ||
final StringBuilder builder = new StringBuilder("^(?:"); | ||
Files.readLines(inputFile, Charset.forName(StandardCharsets.UTF_8.name())) | ||
.forEach(item -> { | ||
final String trimmed = item.trim(); | ||
if (!(trimmed.isEmpty() || trimmed.startsWith("#"))) { | ||
builder.append(trimmed.replaceAll("\\.", "\\\\.")).append("|"); | ||
} | ||
}); | ||
builder.deleteCharAt(builder.length() - 1); | ||
builder.append(")\\.?.*$"); | ||
this.blackList = builder.toString().equals(EMPTY_BLACKLIST) | ||
? "" | ||
: builder.toString(); | ||
this.blackList = Files.readLines(inputFile, Charset.forName(StandardCharsets.UTF_8.name())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to have a comment describing how this block is parsing the blacklist file. Specifically, how are strings being transformed with replace and join? The regexes are not straightforward to interpret. |
||
.stream() | ||
.map(String::trim) | ||
.filter(line -> !line.isEmpty()) | ||
.filter(line -> !line.startsWith("#")) | ||
.map(line -> line.replaceAll("\\.", "\\\\.")) | ||
.collect(Collectors.joining("|", "^(?:",")\\.?.*$")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: formatting - needs space after comma, to make it easier to grok. |
||
|
||
if (this.blackList.equals(EMPTY_BLACKLIST)) { | ||
this.blackList = ""; | ||
} | ||
logger.info("Setting UDF blacklisted classes to: " + blackList); | ||
} catch (IOException e) { | ||
logger.error("failed to load resource blacklist from " + inputFile | ||
|
@@ -60,6 +59,6 @@ public class Blacklist implements Predicate<String> { | |
|
||
@Override | ||
public boolean test(final String resourceName) { | ||
return blackList == null || resourceName.matches(blackList); | ||
return resourceName.matches(blackList); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. out of interest, why are we using a regex for such simple matching? Can't we just dump each line into a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is intended to do partial matches, i.e., the regex produced would be something like: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, gotcha. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,8 @@ | |
import java.util.List; | ||
import java.util.Map; | ||
import java.util.function.Function; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.IntStream; | ||
|
||
import avro.shaded.com.google.common.collect.ImmutableMap; | ||
import io.confluent.ksql.util.KsqlException; | ||
|
@@ -51,6 +53,22 @@ public class UdfCompiler { | |
.put(List.class, index -> typeConversionCode("List", index)) | ||
.build(); | ||
|
||
// Templates used to generate the UDF code | ||
private static final String genericTemplate = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much nicer! |
||
"#TYPE arg#INDEX;\n" | ||
+ "if(args[#INDEX] == null) arg#INDEX = null;\n" | ||
+ "else if (args[#INDEX] instanceof #TYPE) arg#INDEX = (#TYPE)args[#INDEX];\n" | ||
+ "else if (args[#INDEX] instanceof String) arg#INDEX = " | ||
+ "#TYPE.valueOf((String)args[#INDEX]);\n"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this line basically coercing an unmatched type into a string? Wouldn't it be better to just throw an exception? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yay - grokable code! I'm with @apurvam though - implicit type coercion scares me. Why do we need it? Why not just throw? If we need this, then at the very least let's wrap this in a try/catch so we can throw a more meaningful error. Also, what if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No - there are no unmatched types as the types need to be in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i added a test to |
||
|
||
private static final String NUMBER_TEMPLATE = | ||
"else if (args[#INDEX] instanceof Number) arg#INDEX = " | ||
+ "((Number)args[#INDEX]).#NUM_VALUE;\n"; | ||
|
||
private static final String THROWS_TEMPLATE = | ||
"else throw new KsqlFunctionException(\"Type: \" + args[#INDEX].getClass() + \"" | ||
+ " is not supported by KSQL UDFS\");"; | ||
|
||
|
||
UdfInvoker compile(final Method method, final ClassLoader loader) { | ||
try { | ||
|
@@ -87,26 +105,23 @@ UdfInvoker compile(final Method method, final ClassLoader loader) { | |
*/ | ||
|
||
private static String generateCode(final Method method) { | ||
final StringBuilder builder = new StringBuilder(); | ||
final Class<?>[] params = method.getParameterTypes(); | ||
for (int i = 0; i < params.length; i++) { | ||
|
||
final String prefix = IntStream.range(0, params.length).mapToObj(i -> { | ||
final Function<Integer, String> converter = typeConverters.get(params[i]); | ||
if (converter == null) { | ||
throw new KsqlException("Type " + params[i] + " is not supported in UDFs"); | ||
} | ||
builder.append(converter.apply(i)).append("\n"); | ||
} | ||
return converter.apply(i); | ||
}).collect(Collectors.joining("\n", "", "\nreturn ((" | ||
+ method.getDeclaringClass().getSimpleName() | ||
+ ") thiz)." + method.getName() + "(" | ||
)); | ||
|
||
builder.append("\nreturn ((").append(method.getDeclaringClass().getSimpleName()) | ||
.append(") thiz).").append(method.getName()).append("("); | ||
final String code = IntStream.range(0, params.length).mapToObj(i -> "arg" + i) | ||
.collect(Collectors.joining(",", | ||
prefix, ");")); | ||
|
||
for (int i = 0; i < params.length; i++) { | ||
builder.append("arg").append(i).append(","); | ||
} | ||
|
||
builder.deleteCharAt(builder.length() - 1); | ||
builder.append(");"); | ||
final String code = builder.toString(); | ||
logger.debug("generated code for udf method = {}\n{}", method, code); | ||
return code; | ||
} | ||
|
@@ -126,33 +141,23 @@ private static IScriptEvaluator createScriptEvaluator(final Method method, | |
return scriptEvaluator; | ||
} | ||
|
||
|
||
private static String typeConversionCode(final String type, final int index) { | ||
if (type.equals("Map") || type.equals("List")) { | ||
return type + " arg" + index + " = (" + type + ")args[" + index + "];\n"; | ||
} | ||
final String argArrayVal = "args[" + index + "]"; | ||
final String argVarAssignment = "arg" + index + " = "; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. However, I'm still wondering if there is scope for this function to generate invalid code. Even if we know today that its only going to be called with a known set of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See line 113 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't really see your code as simplification! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, it switching the code to only have one set of
Into the easy to grok constants. |
||
final StringBuilder builder = new StringBuilder(); | ||
final String numericValue = type.equals("Integer") ? "intValue()" : type.toLowerCase() | ||
+ "Value()"; | ||
builder.append(type).append(" arg").append(index).append(";\n") | ||
.append("if(").append(argArrayVal).append(" == null) ").append(argVarAssignment) | ||
.append("null;\n") | ||
.append("else if(").append(argArrayVal).append(" instanceof ").append(type).append(") ") | ||
.append(argVarAssignment).append("(").append(type).append(")") | ||
.append(argArrayVal).append(";\n") | ||
.append("else if(").append(argArrayVal).append(" instanceof String) ") | ||
.append(argVarAssignment).append(type).append(".valueOf((String)") | ||
.append(argArrayVal).append(");\n"); | ||
builder.append(genericTemplate.replaceAll("#TYPE", type) | ||
.replaceAll("#INDEX", String.valueOf(index))); | ||
|
||
if (!type.equals("String") && !type.equals("Boolean")) { | ||
builder.append("else if(").append(argArrayVal).append(" instanceof Number) ") | ||
.append(argVarAssignment) | ||
.append("((Number)").append(argArrayVal).append(").").append(numericValue) | ||
.append(";\n"); | ||
final String numericValue = type.equals("Integer") ? "intValue()" : type.toLowerCase() | ||
+ "Value()"; | ||
builder.append(NUMBER_TEMPLATE.replaceAll("#INDEX", String.valueOf(index)) | ||
.replaceAll("#NUM_VALUE", numericValue)); | ||
} | ||
builder.append("else throw new KsqlFunctionException(\"Type: \" + ").append(argArrayVal) | ||
.append(".getClass() + \"is not supported by KSQL UDFS\");"); | ||
|
||
builder.append(THROWS_TEMPLATE.replaceAll("#INDEX", String.valueOf(index))); | ||
return builder.toString(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,6 @@ | |
|
||
package io.confluent.ksql.function; | ||
|
||
import com.google.common.base.Preconditions; | ||
|
||
import org.apache.kafka.common.metrics.Metrics; | ||
import org.apache.kafka.common.metrics.Sensor; | ||
|
@@ -50,7 +49,7 @@ | |
|
||
public class UdfLoader { | ||
|
||
private static final Logger logger = LoggerFactory.getLogger(UdfLoader.class); | ||
private static final Logger LOGGER = LoggerFactory.getLogger(UdfLoader.class); | ||
|
||
private final MetaStore metaStore; | ||
private final File pluginDir; | ||
|
@@ -90,7 +89,7 @@ public void load() { | |
.map(path -> UdfClassLoader.newClassLoader(path, parentClassLoader, blacklist)) | ||
.forEach(this::loadUdfs); | ||
} catch (IOException e) { | ||
logger.error("Failed to load UDFs from location {}", pluginDir, e); | ||
LOGGER.error("Failed to load UDFs from location {}", pluginDir, e); | ||
} | ||
} | ||
} | ||
|
@@ -116,7 +115,7 @@ private void loadUdfs(final ClassLoader loader) { | |
if (parentClassLoader == loader) { | ||
throw e; | ||
} else { | ||
logger.warn("Failed to add UDF to the MetaStore. name={} method={}", | ||
LOGGER.warn("Failed to add UDF to the MetaStore. name={} method={}", | ||
annotation.name(), | ||
method, | ||
e); | ||
|
@@ -130,33 +129,41 @@ private void loadUdfs(final ClassLoader loader) { | |
private void addFunction(final UdfDescription annotation, | ||
final Method method, | ||
final UdfInvoker udf) { | ||
// sanity check | ||
instantiateUdfClass(method, annotation); | ||
final String sensorName = "ksql-udf-" + annotation.name(); | ||
addSensor(sensorName, annotation.name()); | ||
|
||
metaStore.addFunction(new KsqlFunction( | ||
SchemaUtil.getSchemaFromType(method.getReturnType()), | ||
Arrays.stream(method.getGenericParameterTypes()) | ||
.map(SchemaUtil::getSchemaFromType).collect(Collectors.toList()), | ||
annotation.name(), | ||
collectMetrics ? UdfMetricProducer.class : PluggableUdf.class, | ||
() -> { | ||
try { | ||
final PluggableUdf theUdf | ||
= new PluggableUdf(udf, method.getDeclaringClass().newInstance()); | ||
if (collectMetrics) { | ||
return new UdfMetricProducer(metrics.getSensor(sensorName), | ||
theUdf, | ||
new SystemTime()); | ||
} | ||
return theUdf; | ||
} catch (Exception e) { | ||
throw new KsqlException("Failed to create instance for UDF=" | ||
+ annotation.name() | ||
+ ", method=" + method, | ||
e); | ||
final PluggableUdf theUdf | ||
= new PluggableUdf(udf, instantiateUdfClass(method, annotation)); | ||
if (collectMetrics) { | ||
return new UdfMetricProducer(metrics.getSensor(sensorName), | ||
theUdf, | ||
new SystemTime()); | ||
} | ||
return theUdf; | ||
})); | ||
} | ||
|
||
private Object instantiateUdfClass(final Method method, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Static |
||
final UdfDescription annotation) { | ||
try { | ||
return method.getDeclaringClass().newInstance(); | ||
} catch (final Exception e) { | ||
throw new KsqlException("Failed to create instance for UDF=" | ||
+ annotation.name() | ||
+ ", method=" + method, | ||
e); | ||
} | ||
} | ||
|
||
private void addSensor(final String sensorName, final String udfName) { | ||
if (collectMetrics && metrics.getSensor(sensorName) == null) { | ||
final Sensor sensor = metrics.sensor(sensorName); | ||
|
@@ -183,10 +190,6 @@ public static UdfLoader newInstance(final KsqlConfig config, | |
final Boolean loadCustomerUdfs = config.getBoolean(KsqlConfig.KSQL_ENABLE_UDFS); | ||
final Boolean collectMetrics = config.getBoolean(KsqlConfig.KSQL_COLLECT_UDF_METRICS); | ||
final File pluginDir = new File(ksqlInstallDir, "ext"); | ||
|
||
Preconditions.checkArgument(!loadCustomerUdfs || pluginDir.isDirectory(), | ||
pluginDir.getPath() + " must be a directory when " + KsqlConfig.KSQL_ENABLE_UDFS | ||
+ " is true"); | ||
return new UdfLoader(metaStore, | ||
pluginDir, | ||
Thread.currentThread().getContextClassLoader(), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you mean "is a function that can be invoked"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe: "The {@code Udf} annotation on a method tells KSQL that this method should be exposed as a user-defined function in KSQL. The enclosing class must also be annotated with {@code UdfDescription}."