Skip to content

Commit

Permalink
HIVE-25334: Refactor UDF CAST(<Date string> as TIMESTAMP) (Ashish Sha…
Browse files Browse the repository at this point in the history
…rma, reviewed by Adesh Rao, Sankar Hariappan)

Signed-off-by: Sankar Hariappan <sankarh@apache.org>
Closes (apache#2482)
  • Loading branch information
ashish-kumar-sharma authored and dengzhhu653 committed Dec 15, 2022
1 parent 40960c5 commit 1087c01
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,19 @@ protected void obtainTimestampConverter(ObjectInspector[] arguments, int i,
case TIMESTAMP:
case DATE:
case TIMESTAMPLOCALTZ:
case INT:
case SHORT:
case LONG:
case DOUBLE:
case FLOAT:
case DECIMAL:
case VOID:
case BOOLEAN:
case BYTE:
break;
default:
throw new UDFArgumentTypeException(i, getFuncName()
+ " only takes STRING_GROUP or DATE_GROUP types as " + getArgOrder(i) + " argument, got "
+ " only takes STRING_GROUP/DATE_GROUP/NUMERIC_GROUP/VOID_GROUP/BOOLEAN_GROUP types as " + getArgOrder(i) + " argument, got "
+ inputType);
}
outOi = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
*/
package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDateToTimestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToTimestamp;
Expand All @@ -30,13 +30,20 @@
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;

import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.BOOLEAN_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.NUMERIC_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP;
import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.VOID_GROUP;

/**
*
* GenericUDFTimestamp
Expand All @@ -48,13 +55,14 @@
*
*/
@Description(name = "timestamp",
value = "cast(date as timestamp) - Returns timestamp")
@VectorizedExpressions({CastLongToTimestamp.class, CastDateToTimestamp.class,
CastDoubleToTimestamp.class, CastDecimalToTimestamp.class, CastStringToTimestamp.class})
value = "cast(date as timestamp) - Returns timestamp")
@VectorizedExpressions({ CastLongToTimestamp.class, CastDateToTimestamp.class, CastDoubleToTimestamp.class,
CastDecimalToTimestamp.class, CastStringToTimestamp.class })
public class GenericUDFTimestamp extends GenericUDF {

private transient PrimitiveObjectInspector argumentOI;
private transient TimestampConverter tc;
private final transient ObjectInspectorConverters.Converter[] tsConvertors =
new ObjectInspectorConverters.Converter[1];
private final transient PrimitiveCategory[] tsInputTypes = new PrimitiveCategory[1];
/*
* Integer value was interpreted to timestamp inconsistently in milliseconds comparing
* to float/double in seconds. Since the issue exists for a long time and some users may
Expand All @@ -63,61 +71,44 @@ public class GenericUDFTimestamp extends GenericUDF {
* otherwise, it's interpreted as timestamp in seconds.
*/
private boolean intToTimestampInSeconds = false;
private boolean strict = true;

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length < 1) {
throw new UDFArgumentLengthException(
"The function TIMESTAMP requires at least one argument, got "
+ arguments.length);
}

SessionState ss = SessionState.get();
if (ss != null) {
intToTimestampInSeconds = ss.getConf().getBoolVar(ConfVars.HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS);
}
checkArgsSize(arguments, 1, 1);
checkArgPrimitive(arguments, 0);
checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP, NUMERIC_GROUP, VOID_GROUP, BOOLEAN_GROUP);

try {
argumentOI = (PrimitiveObjectInspector) arguments[0];
} catch (ClassCastException e) {
throw new UDFArgumentException(
"The function TIMESTAMP takes only primitive types");
}
strict = SessionState.get() != null ? SessionState.get().getConf()
.getBoolVar(ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION) : new HiveConf()
.getBoolVar(ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION);
intToTimestampInSeconds = SessionState.get() != null ? SessionState.get().getConf()
.getBoolVar(ConfVars.HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS) : new HiveConf()
.getBoolVar(ConfVars.HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS);

if (ss != null && ss.getConf().getBoolVar(ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION)) {
PrimitiveCategory category = argumentOI.getPrimitiveCategory();
PrimitiveGrouping group = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(category);
if (group == PrimitiveGrouping.NUMERIC_GROUP) {
if (strict) {
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(tsInputTypes[0]) == PrimitiveGrouping.NUMERIC_GROUP) {
throw new UDFArgumentException(
"Casting NUMERIC types to TIMESTAMP is prohibited (" + ConfVars.HIVE_STRICT_TIMESTAMP_CONVERSION + ")");
}
}

tc = new TimestampConverter(argumentOI,
PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
tc.setIntToTimestampInSeconds(intToTimestampInSeconds);

obtainTimestampConverter(arguments, 0, tsInputTypes, tsConvertors);
return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
}

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
Object o0 = arguments[0].get();
if (o0 == null) {
return null;
}

return tc.convert(o0);
PrimitiveObjectInspectorConverter.TimestampConverter ts =
(PrimitiveObjectInspectorConverter.TimestampConverter) tsConvertors[0];
ts.setIntToTimestampInSeconds(intToTimestampInSeconds);
return ts.convert(arguments[0].get());
}

@Override
public String getDisplayString(String[] children) {
assert (children.length == 1);
StringBuilder sb = new StringBuilder();
sb.append("CAST( ");
sb.append(children[0]);
sb.append(" AS TIMESTAMP)");
return sb.toString();
return "CAST( " + children[0] + " AS TIMESTAMP)";
}

public boolean isIntToTimestampInSeconds() {
Expand Down

0 comments on commit 1087c01

Please sign in to comment.