Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRILL-4456: Add Hive translate UDF #1527

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;

import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.sql.SqlOperatorBinding;
import org.apache.calcite.sql.fun.OracleSqlOperatorTable;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql.type.SqlReturnTypeInference;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.drill.common.config.DrillConfig;
Expand All @@ -38,6 +41,7 @@
import org.apache.drill.exec.planner.sql.HiveUDFOperator;
import org.apache.drill.exec.planner.sql.HiveUDFOperatorWithoutInference;
import org.apache.drill.exec.planner.sql.TypeInferenceUtils;
import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.udf.UDFType;
Expand All @@ -48,8 +52,18 @@
import org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap;
import org.apache.drill.shaded.guava.com.google.common.collect.Sets;

public class HiveFunctionRegistry implements PluggableFunctionRegistry{
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveFunctionRegistry.class);
public class HiveFunctionRegistry implements PluggableFunctionRegistry {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveFunctionRegistry.class);

/**
* Map for renaming UDFs. Keys of the map represent UDF names which should be replaced
* and its values represent target UDF names.
*/
private static final Map<String, String> FUNCTION_REPLACE_MAP = ImmutableMap.<String, String> builder()
// renames Hive's TRANSLATE UDF to TRANSLATE3 due to CALCITE-1115
.put(SqlStdOperatorTable.TRANSLATE.getName().toLowerCase(),
OracleSqlOperatorTable.TRANSLATE3.getName().toLowerCase())
.build();

private ArrayListMultimap<String, Class<? extends GenericUDF>> methodsGenericUDF = ArrayListMultimap.create();
private ArrayListMultimap<String, Class<? extends UDF>> methodsUDF = ArrayListMultimap.create();
Expand Down Expand Up @@ -102,27 +116,28 @@ public void register(DrillOperatorTable operatorTable) {
}
}

private <C,I> void register(Class<? extends I> clazz, ArrayListMultimap<String,Class<? extends I>> methods) {
private <I> void register(Class<? extends I> clazz, ArrayListMultimap<String, Class<? extends I>> methods) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's not required to have separate renameUDF(names) method, then it's possible to simplify body of this method:

private <C, I> void register(Class<? extends I> clazz, ArrayListMultimap<String, Class<? extends I>> methods) {
    Description desc = clazz.getAnnotation(Description.class);
    Stream<String> names;
    if (desc != null) {
      names = Stream.of(desc.name().split(",")).map(String::trim);
    } else {
      names = Stream.of(clazz).map(Class::getName)
          .map(name -> name.replace('.', '_'));
    }
    names.map(String::toLowerCase)
        .map(funName -> FUNCTION_REPLACE_MAP.getOrDefault(funName, funName))
        .forEach(udfFunName -> methods.put(udfFunName, clazz));

    UDFType type = clazz.getAnnotation(UDFType.class);
    if (type != null && !type.deterministic()) {
      nonDeterministicUDFs.add(clazz);
    }
  }

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, done

Description desc = clazz.getAnnotation(Description.class);
String[] names;
Stream<String> namesStream;
if (desc != null) {
names = desc.name().split(",");
for (int i=0; i<names.length; i++) {
names[i] = names[i].trim();
}
}else{
names = new String[]{clazz.getName().replace('.', '_')};
namesStream = Stream.of(desc.name().split(","))
.map(String::trim);
} else {
namesStream = Stream.of(clazz)
.map(Class::getName)
.map(name -> name.replace('.', '_'));
}

// Checks specified array of function names whether they should be replaced
// using FUNCTION_REPLACE_MAP map.
namesStream.map(String::toLowerCase)
.map(functionName -> FUNCTION_REPLACE_MAP.getOrDefault(functionName, functionName))
.forEach(name -> methods.put(name, clazz));

UDFType type = clazz.getAnnotation(UDFType.class);
if (type != null && !type.deterministic()) {
nonDeterministicUDFs.add(clazz);
}


for(int i=0; i<names.length;i++) {
methods.put(names[i].toLowerCase(), clazz);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,17 @@ public void testToUTCTimestamp() throws Exception {
.go();
}

@Test // DRILL-4456
public void testTranslate3() throws Exception {
testBuilder()
.sqlQuery("SELECT translate(string_field, 's', 'S') as ts," +
"translate(varchar_field, 'v', 'V') as tv,\n" +
"translate('literal', 'l', 'L') as tl from hive.readtest")
.unOrdered()
.baselineColumns("ts", "tv", "tl")
.baselineValues("Stringfield", "Varcharfield", "LiteraL")
.baselineValues(null, null, "LiteraL")
.go();
}

}