Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIVE-25334: refactor timestamp UDF #2482

Merged
merged 5 commits into from
Aug 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,19 @@ protected void obtainTimestampConverter(ObjectInspector[] arguments, int i,
case TIMESTAMP:
case DATE:
case TIMESTAMPLOCALTZ:
case INT:
ashish-kumar-sharma marked this conversation as resolved.
Show resolved Hide resolved
case SHORT:
case LONG:
case DOUBLE:
case FLOAT:
case DECIMAL:
case VOID:
case BOOLEAN:
case BYTE:
break;
default:
throw new UDFArgumentTypeException(i, getFuncName()
+ " only takes STRING_GROUP or DATE_GROUP types as " + getArgOrder(i) + " argument, got "
+ " only takes STRING_GROUP/DATE_GROUP/NUMERIC_GROUP/VOID_GROUP/BOOLEAN_GROUP types as " + getArgOrder(i) + " argument, got "
+ inputType);
}
outOi = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToTimestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToTimestamp;
Expand All @@ -30,13 +30,20 @@
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;

import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.BOOLEAN_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.NUMERIC_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP;

/**
*
* GenericUDFTimestamp
Expand All @@ -48,13 +55,14 @@
*
*/
@Description(name = "timestamp",
value = "cast(date as timestamp) - Returns timestamp")
@VectorizedExpressions({CastLongToTimestamp.class, CastDateToTimestamp.class,
CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class})
value = "cast(date as timestamp) - Returns timestamp")
@VectorizedExpressions({ CastLongToTimestamp.class, CastDateToTimestamp.class, CastDoubleToTimestamp.class,
CastDecimalToTimestamp.class, CastStringToTimestamp.class })
public class GenericUDFTimestamp extends GenericUDF {

private transient PrimitiveObjectInspector argumentOI;
private transient TimestampConverter tc;
private final transient ObjectInspectorConverters.Converter[] tsConvertors =
new ObjectInspectorConverters.Converter[1];
private final transient PrimitiveCategory[] tsInputTypes = new PrimitiveCategory[1];
/*
* Integer value was interpreted to timestamp inconsistently in milliseconds comparing
* to float/double in seconds. Since the issue exists for a long time and some users may
Expand All @@ -63,61 +71,44 @@ public class GenericUDFTimestamp extends GenericUDF {
* otherwise, it's interpreted as timestamp in seconds.
*/
private boolean intToTimestampInSeconds = false;
private boolean strict = true;

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length < 1) {
throw new UDFArgumentLengthException(
"The function TIMESTAMP requires at least one argument, got "
+ arguments.length);
}

SessionState ss = SessionState.get();
if (ss != null) {
intToTimestampInSeconds = ss.getConf().getBoolVar(ConfVars.HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS);
}
checkArgsSize(arguments, 1, 1);
checkArgPrimitive(arguments, 0);
checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP, NUMERIC_GROUP, VOID_GROUP, BOOLEAN_GROUP);

try {
argumentOI = (PrimitiveObjectInspector) arguments[0];
} catch (ClassCastException e) {
throw new UDFArgumentException(
"The function TIMESTAMP takes only primitive types");
}
strict = SessionState.get() != null ? SessionState.get().getConf()
.getBoolVar(ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION) : new HiveConf()
.getBoolVar(ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION);
intToTimestampInSeconds = SessionState.get() != null ? SessionState.get().getConf()
.getBoolVar(ConfVars.HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS) : new HiveConf()
.getBoolVar(ConfVars.HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS);

if (ss != null && ss.getConf().getBoolVar(ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION)) {
PrimitiveCategory category = argumentOI.getPrimitiveCategory();
PrimitiveGrouping group = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(category);
if (group == PrimitiveGrouping.NUMERIC_GROUP) {
if (strict) {
ashish-kumar-sharma marked this conversation as resolved.
Show resolved Hide resolved
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(tsInputTypes[0]) == PrimitiveGrouping.NUMERIC_GROUP) {
sankarh marked this conversation as resolved.
Show resolved Hide resolved
throw new UDFArgumentException(
"Casting NUMERIC types to TIMESTAMP is prohibited (" + ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION + ")");
}
}

tc = new TimestampConverter(argumentOI,
PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
tc.setIntToTimestampInSeconds(intToTimestampInSeconds);

obtainTimestampConverter(arguments, 0, tsInputTypes, tsConvertors);
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
}

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
Object o0 = arguments[0].get();
if (o0 == null) {
return null;
}

return tc.convert(o0);
PrimitiveObjectInspectorConverter.TimestampConverter ts =
(PrimitiveObjectInspectorConverter.TimestampConverter) tsConvertors[0];
ts.setIntToTimestampInSeconds(intToTimestampInSeconds);
return ts.convert(arguments[0].get());
}

@Override
public String getDisplayString(String[] children) {
assert (children.length == 1);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
sb.append(" AS TIMESTAMP)");
return sb.toString();
return "CAST( " + children[0] + " AS TIMESTAMP)";
}

public boolean isIntToTimestampInSeconds() {
Expand Down