From 5e591fa05ad9b2a9564f643494ff148fb2843946 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Mon, 20 Mar 2023 19:11:55 +0100 Subject: [PATCH] HIVE-27157: AssertionError when inferring return type for unix_timestamp function Calls to inferReturnType method for unix_timestamp operators always lead to AssertionError. Contrary to operand type checking and operand type inference that are not really relevant for Hive (the latter is not using the SqlValidator logic), the return type inference is important since it may kick in some calls to RelBuilder/RexBuilder APIs. Such calls exist in older versions of Hive and are widely used in Calcite's built-in rules. Change the implementation of unix_timestamp operators to avoid the AssertionError and infer the return type correctly; always BIGINT. Break the inheritance relation with SqlAbstractTimeFunction and change the SqlFunctionCategory from TIMEDATE to NUMERIC; unix_timestamp is not a time function since the result is never among DATE, TIME, or TIMESTAMP. Change the operant type checker to a more truthful implementation; the type checker is not really used at the moment but it is better to have something realistic there instead of null or something completely wrong. Change the function syntax from FUNCTION_ID to FUNCTION and update some out files. Not a must do but again there is no reason to omit parentheses from a regular function. --- .../HiveToUnixTimestampSqlOperator.java | 23 ++++-- .../HiveUnixTimestampSqlOperator.java | 23 ++++-- .../TestSqlOperatorInferReturnType.java | 73 +++++++++++++++++++ ...cbo_filter_proj_transpose_noinputref.q.out | 8 +- .../cbo_join_transitive_pred_loop_1.q.out | 4 +- 5 files changed, 111 insertions(+), 20 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/TestSqlOperatorInferReturnType.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveToUnixTimestampSqlOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveToUnixTimestampSqlOperator.java index b5ab01c9ada7..372aa3002522 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveToUnixTimestampSqlOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveToUnixTimestampSqlOperator.java @@ -18,15 +18,24 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; -import org.apache.calcite.sql.fun.SqlAbstractTimeFunction; -import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; /** * Sql UNIX_TIMESTAMP calcite operator. */ -public class HiveToUnixTimestampSqlOperator extends SqlAbstractTimeFunction { - public static final HiveToUnixTimestampSqlOperator INSTANCE = new HiveToUnixTimestampSqlOperator(); - protected HiveToUnixTimestampSqlOperator() { - super("UNIX_TIMESTAMP", SqlTypeName.BIGINT); - } +public class HiveToUnixTimestampSqlOperator { + public static final SqlFunction INSTANCE = + new SqlFunction("UNIX_TIMESTAMP", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, null, + OperandTypes.or(OperandTypes.NILADIC, + OperandTypes.or(OperandTypes.STRING, OperandTypes.TIMESTAMP, OperandTypes.DATE), + OperandTypes.STRING_STRING), SqlFunctionCategory.NUMERIC) { + @Override + public boolean isDynamicFunction() { + return true; + } + }; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java index f5a549c9edb9..04bb8b05f21f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnixTimestampSqlOperator.java @@ -18,15 +18,24 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; -import org.apache.calcite.sql.fun.SqlAbstractTimeFunction; -import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; /** * Sql UNIX_TIMESTAMP calcite operator. */ -public class HiveUnixTimestampSqlOperator extends SqlAbstractTimeFunction { - public static final HiveUnixTimestampSqlOperator INSTANCE = new HiveUnixTimestampSqlOperator(); - protected HiveUnixTimestampSqlOperator() { - super("UNIX_TIMESTAMP", SqlTypeName.BIGINT); - } +public class HiveUnixTimestampSqlOperator { + public static final SqlFunction INSTANCE = + new SqlFunction("UNIX_TIMESTAMP", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, null, + OperandTypes.or(OperandTypes.NILADIC, + OperandTypes.or(OperandTypes.STRING, OperandTypes.TIMESTAMP, OperandTypes.DATE), + OperandTypes.STRING_STRING), SqlFunctionCategory.NUMERIC) { + @Override + public boolean isDynamicFunction() { + return true; + } + }; } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/TestSqlOperatorInferReturnType.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/TestSqlOperatorInferReturnType.java new file mode 100644 index 000000000000..b948100c38ae --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/TestSqlOperatorInferReturnType.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +@RunWith(Parameterized.class) +public class TestSqlOperatorInferReturnType { + private static final RelDataTypeFactory TYPE_FACTORY = new JavaTypeFactoryImpl(new HiveTypeSystemImpl()); + final SqlOperator op; + final List inputTypes; + final RelDataType returnType; + + public TestSqlOperatorInferReturnType(final SqlOperator op, final List inputTypes, + final RelDataType returnType) { + this.op = op; + this.inputTypes = inputTypes; + this.returnType = returnType; + } + + @Test + public void testInferReturnType() { + Assert.assertEquals(returnType, op.inferReturnType(TYPE_FACTORY, inputTypes)); + } + + @Parameterized.Parameters(name = "op={0}, inTypes={1}, expectedReturnType={2}") + public static Collection generateValidOperatorCalls() throws SemanticException { + RelDataType varchar19 = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR, 19); + RelDataType bigint = TYPE_FACTORY.createSqlType(SqlTypeName.BIGINT); + + List calls = new ArrayList<>(); + calls.add(new Object[] { HiveUnixTimestampSqlOperator.INSTANCE, Collections.emptyList(), bigint }); + calls.add(new Object[] { HiveUnixTimestampSqlOperator.INSTANCE, Collections.singletonList(varchar19), bigint }); + calls.add(new Object[] { HiveUnixTimestampSqlOperator.INSTANCE, Arrays.asList(varchar19, varchar19), bigint }); + + calls.add(new Object[] { HiveToUnixTimestampSqlOperator.INSTANCE, Collections.emptyList(), bigint }); + calls.add(new Object[] { HiveToUnixTimestampSqlOperator.INSTANCE, Collections.singletonList(varchar19), bigint }); + calls.add(new Object[] { HiveToUnixTimestampSqlOperator.INSTANCE, Arrays.asList(varchar19, varchar19), bigint }); + return calls; + } + +} diff --git a/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out b/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out index 508666a70cd4..587f95c03a52 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_filter_proj_transpose_noinputref.q.out @@ -44,11 +44,11 @@ POSTHOOK: Input: default@test2 CBO PLAN: HiveProject(m=[$0]) HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(m=[substr(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)]) - HiveFilter(condition=[=(substr(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1), _UTF-16LE'2')]) + HiveProject(m=[substr(FROM_UNIXTIME(UNIX_TIMESTAMP(), _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)]) + HiveFilter(condition=[=(substr(FROM_UNIXTIME(UNIX_TIMESTAMP(), _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1), _UTF-16LE'2')]) HiveProject(DUMMY=[0]) HiveTableScan(table=[[default, test1]], table:alias=[test1]) - HiveProject($f0=[substr(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)]) - HiveFilter(condition=[AND(=($0, substr(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)), =(substr(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1), _UTF-16LE'2'))]) + HiveProject($f0=[substr(FROM_UNIXTIME(UNIX_TIMESTAMP(), _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)]) + HiveFilter(condition=[AND(=($0, substr(FROM_UNIXTIME(UNIX_TIMESTAMP(), _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1)), =(substr(FROM_UNIXTIME(UNIX_TIMESTAMP(), _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), 1, 1), _UTF-16LE'2'))]) HiveTableScan(table=[[default, test2]], table:alias=[d]) diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out index 17c1bcc9d41c..9ea53f75cb69 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out @@ -62,8 +62,8 @@ HiveProject(month=[$0], con_usd=[$2]) HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(month=[$0]) HiveUnion(all=[true]) - HiveProject(month=[CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER]) - HiveFilter(condition=[IS NOT NULL(CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER)]) + HiveProject(month=[CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP(), _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER]) + HiveFilter(condition=[IS NOT NULL(CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP(), _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER)]) HiveProject(DUMMY=[0]) HiveTableScan(table=[[default, test1]], table:alias=[test1]) HiveProject($f0=[CAST(202110):INTEGER])