From a57805e4afa631ee4e47276e887e19dfd65a22e8 Mon Sep 17 00:00:00 2001 From: Taraka Rama Rao Lethavadla Date: Tue, 28 Feb 2023 19:51:23 +0530 Subject: [PATCH 1/7] HIVE-27112 - implement array_except UDF in Hive --- .../hadoop/hive/ql/exec/FunctionRegistry.java | 1 + .../ql/udf/generic/GenericUDFArrayExcept.java | 59 +++++ .../generic/TestGenericUDFArrayExcept.java | 232 ++++++++++++++++++ .../queries/clientpositive/udf_array_except.q | 38 +++ 4 files changed, 330 insertions(+) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java create mode 100644 ql/src/test/queries/clientpositive/udf_array_except.q diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index cb5aa5b96783..886d34344331 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -605,6 +605,7 @@ public final class FunctionRegistry { system.registerGenericUDF("array_distinct", GenericUDFArrayDistinct.class); system.registerGenericUDF("array_join", GenericUDFArrayJoin.class); system.registerGenericUDF("array_slice", GenericUDFArraySlice.class); + system.registerGenericUDF("array_except", GenericUDFArrayExcept.class); system.registerGenericUDF("deserialize", GenericUDFDeserialize.class); system.registerGenericUDF("sentences", GenericUDFSentences.class); system.registerGenericUDF("map_keys", GenericUDFMapKeys.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java new file mode 100644 index 000000000000..3bd7ecac9779 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +import java.util.List; +import java.util.stream.Collectors; + +/** + * GenericUDFArrayExcept + */ +@Description(name = "array_except", value = "_FUNC_(array, value) - Returns an array of the elements in array1 but not in array2.", extended = + "Example:\n" + " > SELECT _FUNC_(array(1, 2, 3,4), array(2,3)) FROM src LIMIT 1;\n" + + " [1,4]") @NDV(maxNdv = 2) public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase { + static final int ARRAY2_IDX = 1; + private static final String FUNC_NAME = "ARRAY_EXCEPT"; + + public GenericUDFArrayExcept() { + super(FUNC_NAME, 2, 2, ObjectInspector.Category.LIST); + } + + @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + ObjectInspector defaultOI = super.initialize(arguments); + checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); //Array1 is already getting validated in Parent class + return defaultOI; + } + + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object array = arguments[ARRAY_IDX].get(); + if (array == null || arrayOI.getListLength(array) <= 0) { + return null; + } + + List retArray3 = ((ListObjectInspector) argumentOIs[ARRAY_IDX]).getList(array); + retArray3.removeAll(((ListObjectInspector) argumentOIs[ARRAY2_IDX]).getList(arguments[ARRAY2_IDX].get())); + return retArray3.stream().distinct().map(o -> converter.convert(o)).collect(Collectors.toList()); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java new file mode 100644 index 000000000000..63011832b053 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static java.util.Arrays.asList; + +public class TestGenericUDFArrayExcept { + private final GenericUDFArrayExcept udf = new GenericUDFArrayExcept(); + + @Test + public void testPrimitive() throws HiveException { + ObjectInspector[] inputOIs = { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector) + }; + udf.initialize(inputOIs); + + Object i1 = new IntWritable(1); + Object i2 = new IntWritable(2); + Object i3 = new IntWritable(4); + Object i4 = new IntWritable(5); + + Object i5 = new IntWritable(1); + Object i6 = new IntWritable(3); + Object i7 = new IntWritable(2); + Object i8 = new IntWritable(9); + + List inputList = new ArrayList<>(); + inputList.add(i1); + inputList.add(i2); + inputList.add(i3); + inputList.add(i4); + + runAndVerify(inputList, asList(i5, i6, i7, i8), asList(i3,i4)); + i1 = new FloatWritable(3.3f); + i2 = new FloatWritable(1.1f); + i3 = new FloatWritable(4.3f); + i4 = new FloatWritable(2.22f); + + i5 = new FloatWritable(3.3f); + i6 = new FloatWritable(1.1f); + i7 = new FloatWritable(2.28f); + i8 = new FloatWritable(2.20f); + List inputListf = new ArrayList<>(); + inputListf.add(i1); + inputListf.add(i2); + inputListf.add(i3); + inputListf.add(i4); + + runAndVerify(inputListf, asList(i5, i6, i7, i8), asList(i3, i4)); + } + + @Test + public void testList() throws HiveException { + ObjectInspector[] inputOIs = { + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector + ) + ), + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector + ) + ) + }; + udf.initialize(inputOIs); + + Object i1 = asList(new Text("aa1"), new Text("dd"), new Text("cc"), new Text("bb")); + Object i2 = asList(new Text("aa2"), new Text("cc"), new Text("ba"), new Text("dd")); + Object i3 = asList(new Text("aa3"), new Text("cc"), new Text("dd"), new Text("ee"), new Text("bb")); + Object i4 = asList(new Text("aa4"), new Text("cc"), new Text("ddd"), new Text("bb")); + List inputList = new ArrayList<>(); + inputList.add(i1); + inputList.add(i2); + inputList.add(i3); + inputList.add(i4); + runAndVerify(inputList, asList(i1, i2, i2), asList(i3, i4)); + } + + @Test + public void testStruct() throws HiveException { + ObjectInspector[] inputOIs = { + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardStructObjectInspector( + asList("f1", "f2", "f3", "f4"), + asList( + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector, + PrimitiveObjectInspectorFactory.writableDateObjectInspector, + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector + ) + ) + ) + ), + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardStructObjectInspector( + asList("f1", "f2", "f3", "f4"), + asList( + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector, + PrimitiveObjectInspectorFactory.writableDateObjectInspector, + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector + ) + ) + ) + ) + }; + udf.initialize(inputOIs); + + Object i1 = asList(new Text("a"), new DoubleWritable(3.1415), + new DateWritableV2(Date.of(2015, 5, 26)), + asList(new IntWritable(1), new IntWritable(3), + new IntWritable(2), new IntWritable(4))); + + Object i2 = asList(new Text("b"), new DoubleWritable(3.14), + new DateWritableV2(Date.of(2015, 5, 26)), + asList(new IntWritable(1), new IntWritable(3), + new IntWritable(2), new IntWritable(4))); + + Object i3 = asList(new Text("a"), new DoubleWritable(3.1415), + new DateWritableV2(Date.of(2015, 5, 25)), + asList(new IntWritable(1), new IntWritable(3), + new IntWritable(2), new IntWritable(5))); + + Object i4 = asList(new Text("a"), new DoubleWritable(3.1415), + new DateWritableV2(Date.of(2015, 5, 25)), + asList(new IntWritable(1), new IntWritable(3), + new IntWritable(2), new IntWritable(4))); + + List inputList = new ArrayList<>(); + inputList.add(i1); + inputList.add(i2); + inputList.add(i3); + inputList.add(i4); + runAndVerify(inputList, asList(i1, i3), asList(i2, i4)); + } + + @Test + public void testMap() throws HiveException { + ObjectInspector[] inputOIs = { + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + PrimitiveObjectInspectorFactory.writableIntObjectInspector + ) + ), + ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + PrimitiveObjectInspectorFactory.writableIntObjectInspector + ) + ) + }; + udf.initialize(inputOIs); + + Map m1 = new HashMap<>(); + m1.put(new Text("a"), new IntWritable(4)); + m1.put(new Text("b"), new IntWritable(3)); + m1.put(new Text("c"), new IntWritable(1)); + m1.put(new Text("d"), new IntWritable(2)); + + Map m2 = new HashMap<>(); + m2.put(new Text("d"), new IntWritable(4)); + m2.put(new Text("b"), new IntWritable(3)); + m2.put(new Text("a"), new IntWritable(1)); + m2.put(new Text("c"), new IntWritable(2)); + + Map m3 = new HashMap<>(); + m3.put(new Text("d"), new IntWritable(4)); + m3.put(new Text("b"), new IntWritable(3)); + m3.put(new Text("a"), new IntWritable(1)); + + Map m4 = new HashMap<>(); + m3.put(new Text("e"), new IntWritable(4)); + m3.put(new Text("b"), new IntWritable(3)); + m3.put(new Text("a"), new IntWritable(1)); + + List inputList = new ArrayList<>(); + inputList.add(m1); + inputList.add(m3); + inputList.add(m2); + inputList.add(m4); + inputList.add(m1); + runAndVerify(inputList, asList(m1,m3), asList(m2,m4)); + } + + private void runAndVerify(List actual, List actual2, List expected) + throws HiveException { + GenericUDF.DeferredJavaObject[] args = {new GenericUDF.DeferredJavaObject(actual), new GenericUDF.DeferredJavaObject(actual2)}; + List result = (List) udf.evaluate(args); + Assert.assertArrayEquals("Check content", expected.toArray(), result.toArray()); + } +} diff --git a/ql/src/test/queries/clientpositive/udf_array_except.q b/ql/src/test/queries/clientpositive/udf_array_except.q new file mode 100644 index 000000000000..345898355f8d --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_array_except.q @@ -0,0 +1,38 @@ +--! qt:dataset:src + +-- SORT_QUERY_RESULTS + +set hive.fetch.task.conversion=more; + +DESCRIBE FUNCTION array_except; +DESCRIBE FUNCTION EXTENDED array_except; + +-- evalutes function for array of primitives +SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) FROM src tablesample (1 rows); + +SELECT array_except(array(),array()) FROM src tablesample (1 rows); + +SELECT array_except(array(null),array(null)) FROM src tablesample (1 rows); + +SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) FROM src tablesample (1 rows); + +SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) FROM src tablesample (1 rows); + +SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) FROM src tablesample (1 rows); + +SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) FROM src tablesample (1 rows); + +# handle null array cases + +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/test_null_array; + +dfs -copyFromLocal ../../data/files/test_null_array.csv ${system:test.tmp.dir}/test_null_array/; + +create external table test_null_array (id int, value Array) ROW FORMAT DELIMITED + FIELDS TERMINATED BY ':' collection items terminated by ',' location '${system:test.tmp.dir}/test_null_array'; + +select value from test_null_array; + +select array_except(value,value) from test_null_array; + +dfs -rm -r ${system:test.tmp.dir}/test_null_array; \ No newline at end of file From 65e4e3834b4464d28360c7d525d794b0851481fa Mon Sep 17 00:00:00 2001 From: Taraka Rama Rao Lethavadla Date: Wed, 1 Mar 2023 06:26:09 +0000 Subject: [PATCH 2/7] HIVE-27112 - implement array_except UDF in Hive --- .../clientpositive/llap/show_functions.q.out | 2 + .../llap/udf_array_except.q.out | 112 ++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 ql/src/test/results/clientpositive/llap/udf_array_except.q.out diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out b/ql/src/test/results/clientpositive/llap/show_functions.q.out index 60c4d49669d0..c683b08b7225 100644 --- a/ql/src/test/results/clientpositive/llap/show_functions.q.out +++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out @@ -48,6 +48,7 @@ approx_distinct array array_contains array_distinct +array_except array_join array_max array_min @@ -669,6 +670,7 @@ approx_distinct array array_contains array_distinct +array_except array_join array_max array_min diff --git a/ql/src/test/results/clientpositive/llap/udf_array_except.q.out b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out new file mode 100644 index 000000000000..b2063a3b4736 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: DESCRIBE FUNCTION array_except +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION array_except +POSTHOOK: type: DESCFUNCTION +array_except(array, value) - Returns an array of the elements in array1 but not in array2. +PREHOOK: query: DESCRIBE FUNCTION EXTENDED array_except +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED array_except +POSTHOOK: type: DESCFUNCTION +array_except(array, value) - Returns an array of the elements in array1 but not in array2. +Example: + > SELECT array_except(array(1, 2, 3,4), array(2,3)) FROM src LIMIT 1; + [1,4] +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayExcept +Function type:BUILTIN +PREHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[2,4] +PREHOOK: query: SELECT array_except(array(),array()) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(),array()) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +PREHOOK: query: SELECT array_except(array(null),array(null)) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(null),array(null)) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[] +PREHOOK: query: SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[2.23,null,2.9] +PREHOOK: query: SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[2.23456789] +PREHOOK: query: SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[null] +PREHOOK: query: SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +[["e","a","b","c","d"]] +PREHOOK: query: create external table test_null_array (id int, value Array) ROW FORMAT DELIMITED +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@test_null_array +POSTHOOK: query: create external table test_null_array (id int, value Array) ROW FORMAT DELIMITED +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_null_array +PREHOOK: query: select value from test_null_array +PREHOOK: type: QUERY +PREHOOK: Input: default@test_null_array +#### A masked pattern was here #### +POSTHOOK: query: select value from test_null_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_null_array +#### A masked pattern was here #### +["NULL"] +["null","null"] +[] +PREHOOK: query: select array_except(value,value) from test_null_array +PREHOOK: type: QUERY +PREHOOK: Input: default@test_null_array +#### A masked pattern was here #### +POSTHOOK: query: select array_except(value,value) from test_null_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_null_array +#### A masked pattern was here #### +NULL +[] +[] From e16790165f87768419b4f1fbcb18b30fa67f0b5b Mon Sep 17 00:00:00 2001 From: Taraka Rama Rao Lethavadla Date: Thu, 2 Mar 2023 08:53:25 +0000 Subject: [PATCH 3/7] HIVE-27112 - implement array_except UDF in Hive --- .../ql/udf/generic/GenericUDFArrayExcept.java | 4 +-- .../generic/TestGenericUDFArrayExcept.java | 20 +++++++++++--- .../queries/clientpositive/udf_array_except.q | 4 +++ .../llap/udf_array_except.q.out | 26 +++++++++++++++++-- 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java index 3bd7ecac9779..f0f2152056d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java @@ -29,9 +29,9 @@ /** * GenericUDFArrayExcept */ -@Description(name = "array_except", value = "_FUNC_(array, value) - Returns an array of the elements in array1 but not in array2.", extended = +@Description(name = "array_except", value = "_FUNC_(array1, array2) - Returns an array of the elements in array1 but not in array2.", extended = "Example:\n" + " > SELECT _FUNC_(array(1, 2, 3,4), array(2,3)) FROM src LIMIT 1;\n" - + " [1,4]") @NDV(maxNdv = 2) public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase { + + " [1,4]") public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase { static final int ARRAY2_IDX = 1; private static final String FUNC_NAME = "ARRAY_EXCEPT"; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java index 63011832b053..efb5e716afbb 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java @@ -55,12 +55,10 @@ public void testPrimitive() throws HiveException { Object i2 = new IntWritable(2); Object i3 = new IntWritable(4); Object i4 = new IntWritable(5); - Object i5 = new IntWritable(1); Object i6 = new IntWritable(3); Object i7 = new IntWritable(2); Object i8 = new IntWritable(9); - List inputList = new ArrayList<>(); inputList.add(i1); inputList.add(i2); @@ -68,11 +66,11 @@ public void testPrimitive() throws HiveException { inputList.add(i4); runAndVerify(inputList, asList(i5, i6, i7, i8), asList(i3,i4)); + i1 = new FloatWritable(3.3f); i2 = new FloatWritable(1.1f); i3 = new FloatWritable(4.3f); i4 = new FloatWritable(2.22f); - i5 = new FloatWritable(3.3f); i6 = new FloatWritable(1.1f); i7 = new FloatWritable(2.28f); @@ -83,7 +81,21 @@ public void testPrimitive() throws HiveException { inputListf.add(i3); inputListf.add(i4); - runAndVerify(inputListf, asList(i5, i6, i7, i8), asList(i3, i4)); + runAndVerify(new ArrayList<>(inputListf), asList(i5, i6, i7, i8), asList(i3, i4)); + + runAndVerify(new ArrayList<>(inputListf),inputList,asList(i1,i2,i3,i4)); // Int & float arrays + + Object s1 = new Text("1"); + Object s2 = new Text("2"); + Object s3 = new Text("4"); + Object s4 = new Text("5"); + List inputLists = new ArrayList<>(); + inputLists.add(s1); + inputLists.add(s2); + inputLists.add(s3); + inputLists.add(s4); + + runAndVerify(new ArrayList<>(inputListf),inputLists,asList(i1,i2,i3,i4)); // float and string arrays } @Test diff --git a/ql/src/test/queries/clientpositive/udf_array_except.q b/ql/src/test/queries/clientpositive/udf_array_except.q index 345898355f8d..e54a145fd899 100644 --- a/ql/src/test/queries/clientpositive/udf_array_except.q +++ b/ql/src/test/queries/clientpositive/udf_array_except.q @@ -35,4 +35,8 @@ select value from test_null_array; select array_except(value,value) from test_null_array; +select value, array_except(value,value) from test_null_array; + +SELECT array_except(array(1, 2, 3, null,3,4),value) from test_null_array; + dfs -rm -r ${system:test.tmp.dir}/test_null_array; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/udf_array_except.q.out b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out index b2063a3b4736..f6c0fb866fde 100644 --- a/ql/src/test/results/clientpositive/llap/udf_array_except.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out @@ -2,12 +2,12 @@ PREHOOK: query: DESCRIBE FUNCTION array_except PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION array_except POSTHOOK: type: DESCFUNCTION -array_except(array, value) - Returns an array of the elements in array1 but not in array2. +array_except(array1, array2) - Returns an array of the elements in array1 but not in array2. PREHOOK: query: DESCRIBE FUNCTION EXTENDED array_except PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESCRIBE FUNCTION EXTENDED array_except POSTHOOK: type: DESCFUNCTION -array_except(array, value) - Returns an array of the elements in array1 but not in array2. +array_except(array1, array2) - Returns an array of the elements in array1 but not in array2. Example: > SELECT array_except(array(1, 2, 3,4), array(2,3)) FROM src LIMIT 1; [1,4] @@ -110,3 +110,25 @@ POSTHOOK: Input: default@test_null_array NULL [] [] +PREHOOK: query: select value, array_except(value,value) from test_null_array +PREHOOK: type: QUERY +PREHOOK: Input: default@test_null_array +#### A masked pattern was here #### +POSTHOOK: query: select value, array_except(value,value) from test_null_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_null_array +#### A masked pattern was here #### +[] NULL +[] [] +[] [] +PREHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),value) from test_null_array +PREHOOK: type: QUERY +PREHOOK: Input: default@test_null_array +#### A masked pattern was here #### +POSTHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),value) from test_null_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_null_array +#### A masked pattern was here #### +[1,2,3,null,4] +[1,2,3,null,4] +[1,2,3,null,4] From 75ebbf661cec87f1ee506d60511894375c5ae4c0 Mon Sep 17 00:00:00 2001 From: Taraka Rama Rao Lethavadla Date: Wed, 10 May 2023 13:06:36 +0530 Subject: [PATCH 4/7] HIVE-27112 - implement array_except UDF in Hive --- .../queries/clientpositive/udf_array_except.q | 14 ++--- .../llap/udf_array_except.q.out | 56 +++++++++---------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/ql/src/test/queries/clientpositive/udf_array_except.q b/ql/src/test/queries/clientpositive/udf_array_except.q index e54a145fd899..2187b7f04f22 100644 --- a/ql/src/test/queries/clientpositive/udf_array_except.q +++ b/ql/src/test/queries/clientpositive/udf_array_except.q @@ -8,19 +8,19 @@ DESCRIBE FUNCTION array_except; DESCRIBE FUNCTION EXTENDED array_except; -- evalutes function for array of primitives -SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) FROM src tablesample (1 rows); +SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)); -SELECT array_except(array(),array()) FROM src tablesample (1 rows); +SELECT array_except(array(),array()); -SELECT array_except(array(null),array(null)) FROM src tablesample (1 rows); +SELECT array_except(array(null),array(null)); -SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) FROM src tablesample (1 rows); +SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)); -SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) FROM src tablesample (1 rows); +SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)); -SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) FROM src tablesample (1 rows); +SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)); -SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) FROM src tablesample (1 rows); +SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)); # handle null array cases diff --git a/ql/src/test/results/clientpositive/llap/udf_array_except.q.out b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out index f6c0fb866fde..88783bba0010 100644 --- a/ql/src/test/results/clientpositive/llap/udf_array_except.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out @@ -13,67 +13,67 @@ Example: [1,4] Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayExcept Function type:BUILTIN -PREHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) FROM src tablesample (1 rows) +PREHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) FROM src tablesample (1 rows) +POSTHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),array(1, 3, null)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### [2,4] -PREHOOK: query: SELECT array_except(array(),array()) FROM src tablesample (1 rows) +PREHOOK: query: SELECT array_except(array(),array()) PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(),array()) FROM src tablesample (1 rows) +POSTHOOK: query: SELECT array_except(array(),array()) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### NULL -PREHOOK: query: SELECT array_except(array(null),array(null)) FROM src tablesample (1 rows) +PREHOOK: query: SELECT array_except(array(null),array(null)) PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(null),array(null)) FROM src tablesample (1 rows) +POSTHOOK: query: SELECT array_except(array(null),array(null)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### [] -PREHOOK: query: SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) FROM src tablesample (1 rows) +PREHOOK: query: SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) FROM src tablesample (1 rows) +POSTHOOK: query: SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### [2.23,null,2.9] -PREHOOK: query: SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) FROM src tablesample (1 rows) +PREHOOK: query: SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) FROM src tablesample (1 rows) +POSTHOOK: query: SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### [2.23456789] -PREHOOK: query: SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) FROM src tablesample (1 rows) +PREHOOK: query: SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) FROM src tablesample (1 rows) +POSTHOOK: query: SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### [null] -PREHOOK: query: SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) FROM src tablesample (1 rows) +PREHOOK: query: SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) FROM src tablesample (1 rows) +POSTHOOK: query: SELECT array_except(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### [["e","a","b","c","d"]] PREHOOK: query: create external table test_null_array (id int, value Array) ROW FORMAT DELIMITED From e7a3444f9df3dabea6230cd77c10229252980ce0 Mon Sep 17 00:00:00 2001 From: Taraka Rama Rao Lethavadla Date: Fri, 9 Jun 2023 21:31:24 +0530 Subject: [PATCH 5/7] HIVE-27112 - implement array_except UDF in Hive --- .../ql/udf/generic/GenericUDFArrayExcept.java | 29 +++++++-- .../generic/TestGenericUDFArrayExcept.java | 61 ++++++++++++------- .../queries/clientpositive/udf_array_except.q | 4 +- .../llap/udf_array_except.q.out | 28 ++++----- 4 files changed, 80 insertions(+), 42 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java index f0f2152056d6..4d7fab09fc7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java @@ -19,10 +19,13 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; @@ -31,9 +34,14 @@ */ @Description(name = "array_except", value = "_FUNC_(array1, array2) - Returns an array of the elements in array1 but not in array2.", extended = "Example:\n" + " > SELECT _FUNC_(array(1, 2, 3,4), array(2,3)) FROM src LIMIT 1;\n" - + " [1,4]") public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase { + + " [1,4]") +public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase { static final int ARRAY2_IDX = 1; private static final String FUNC_NAME = "ARRAY_EXCEPT"; + static final String ERROR_NOT_COMPARABLE = "Input arrays are not comparable to use ARRAY_EXCEPT udf"; + private transient ListObjectInspector array2OI; + private transient ObjectInspector arrayElementOI; + private transient ObjectInspector array2ElementOI; public GenericUDFArrayExcept() { super(FUNC_NAME, 2, 2, ObjectInspector.Category.LIST); @@ -41,19 +49,32 @@ public GenericUDFArrayExcept() { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { ObjectInspector defaultOI = super.initialize(arguments); + array2OI = (ListObjectInspector) arguments[ARRAY2_IDX]; checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); //Array1 is already getting validated in Parent class + arrayElementOI = arrayOI.getListElementObjectInspector(); + array2ElementOI = array2OI.getListElementObjectInspector(); + if (!ObjectInspectorUtils.compareTypes(arrayElementOI, array2ElementOI)) { // check if elements of arrays are comparable + throw new UDFArgumentTypeException(1, ERROR_NOT_COMPARABLE); + } return defaultOI; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { Object array = arguments[ARRAY_IDX].get(); - if (array == null || arrayOI.getListLength(array) <= 0) { + Object array2 = arguments[ARRAY2_IDX].get(); + if (array == null) { + return null; + } + + if (array2 == null) { return null; } List retArray3 = ((ListObjectInspector) argumentOIs[ARRAY_IDX]).getList(array); - retArray3.removeAll(((ListObjectInspector) argumentOIs[ARRAY2_IDX]).getList(arguments[ARRAY2_IDX].get())); - return retArray3.stream().distinct().map(o -> converter.convert(o)).collect(Collectors.toList()); + List inputArrayCopy = new ArrayList<>(); + inputArrayCopy.addAll(retArray3); + inputArrayCopy.removeAll(((ListObjectInspector) argumentOIs[ARRAY2_IDX]).getList(arguments[ARRAY2_IDX].get())); + return inputArrayCopy.stream().distinct().map(o -> converter.convert(o)).collect(Collectors.toList()); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java index efb5e716afbb..bd7dac741b83 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf.generic; import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -43,13 +44,16 @@ public class TestGenericUDFArrayExcept { @Test public void testPrimitive() throws HiveException { - ObjectInspector[] inputOIs = { - ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableIntObjectInspector), - ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableIntObjectInspector) - }; - udf.initialize(inputOIs); + ObjectInspector intObjectInspector = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector); + ObjectInspector floatObjectInspector = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector); + ObjectInspector doubleObjectInspector = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + ObjectInspector longObjectInspector = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector); + ObjectInspector stringObjectInspector = ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector); Object i1 = new IntWritable(1); Object i2 = new IntWritable(2); @@ -65,6 +69,7 @@ public void testPrimitive() throws HiveException { inputList.add(i3); inputList.add(i4); + udf.initialize(new ObjectInspector[] { intObjectInspector, intObjectInspector }); runAndVerify(inputList, asList(i5, i6, i7, i8), asList(i3,i4)); i1 = new FloatWritable(3.3f); @@ -75,27 +80,41 @@ public void testPrimitive() throws HiveException { i6 = new FloatWritable(1.1f); i7 = new FloatWritable(2.28f); i8 = new FloatWritable(2.20f); - List inputListf = new ArrayList<>(); - inputListf.add(i1); - inputListf.add(i2); - inputListf.add(i3); - inputListf.add(i4); - - runAndVerify(new ArrayList<>(inputListf), asList(i5, i6, i7, i8), asList(i3, i4)); + List inputFloatList = new ArrayList<>(); + inputFloatList.add(i1); + inputFloatList.add(i2); + inputFloatList.add(i3); + inputFloatList.add(i4); - runAndVerify(new ArrayList<>(inputListf),inputList,asList(i1,i2,i3,i4)); // Int & float arrays + udf.initialize(new ObjectInspector[] { floatObjectInspector, floatObjectInspector }); + runAndVerify(new ArrayList<>(inputFloatList), asList(i5, i6, i7, i8), asList(i3, i4)); Object s1 = new Text("1"); Object s2 = new Text("2"); Object s3 = new Text("4"); Object s4 = new Text("5"); - List inputLists = new ArrayList<>(); - inputLists.add(s1); - inputLists.add(s2); - inputLists.add(s3); - inputLists.add(s4); + List inputStringList = new ArrayList<>(); + inputStringList.add(s1); + inputStringList.add(s2); + inputStringList.add(s3); + inputStringList.add(s4); - runAndVerify(new ArrayList<>(inputListf),inputLists,asList(i1,i2,i3,i4)); // float and string arrays + udf.initialize(new ObjectInspector[] { stringObjectInspector, stringObjectInspector }); + runAndVerify(inputStringList,asList(s1,s3),asList(s2,s4)); + // Empty array output + runAndVerify(inputStringList,inputStringList,asList()); + runAndVerify(inputStringList,asList(),inputStringList); + // Empty input arrays + runAndVerify(asList(),asList(),asList()); + // Int & float arrays + UDFArgumentTypeException exception = Assert.assertThrows(UDFArgumentTypeException.class, () -> udf.initialize(new ObjectInspector[] { floatObjectInspector, intObjectInspector })); + Assert.assertEquals(GenericUDFArrayExcept.ERROR_NOT_COMPARABLE,exception.getMessage()); + // float and string arrays + exception = Assert.assertThrows(UDFArgumentTypeException.class, () -> udf.initialize(new ObjectInspector[] { floatObjectInspector, stringObjectInspector })); + Assert.assertEquals(GenericUDFArrayExcept.ERROR_NOT_COMPARABLE,exception.getMessage()); + // long and double arrays + exception = Assert.assertThrows(UDFArgumentTypeException.class, () -> udf.initialize(new ObjectInspector[] { longObjectInspector, doubleObjectInspector })); + Assert.assertEquals(GenericUDFArrayExcept.ERROR_NOT_COMPARABLE,exception.getMessage()); } @Test diff --git a/ql/src/test/queries/clientpositive/udf_array_except.q b/ql/src/test/queries/clientpositive/udf_array_except.q index 2187b7f04f22..ec5cfd2c1c26 100644 --- a/ql/src/test/queries/clientpositive/udf_array_except.q +++ b/ql/src/test/queries/clientpositive/udf_array_except.q @@ -16,6 +16,8 @@ SELECT array_except(array(null),array(null)); SELECT array_except(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),array(1.12,3.34,1.11,1.12)); +SELECT array(1,2,3),array_except(array(1, 2, 3),array(1,3,4)); + SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)); SELECT array_except(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),array(11234567890, 2234567890, 334567890)); @@ -37,6 +39,4 @@ select array_except(value,value) from test_null_array; select value, array_except(value,value) from test_null_array; -SELECT array_except(array(1, 2, 3, null,3,4),value) from test_null_array; - dfs -rm -r ${system:test.tmp.dir}/test_null_array; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/udf_array_except.q.out b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out index 88783bba0010..8270af61b07a 100644 --- a/ql/src/test/results/clientpositive/llap/udf_array_except.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_array_except.q.out @@ -30,7 +30,7 @@ POSTHOOK: query: SELECT array_except(array(),array()) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -NULL +[] PREHOOK: query: SELECT array_except(array(null),array(null)) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -49,6 +49,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### [2.23,null,2.9] +PREHOOK: query: SELECT array(1,2,3),array_except(array(1, 2, 3),array(1,3,4)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT array(1,2,3),array_except(array(1, 2, 3),array(1,3,4)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +[1,2,3] [2] PREHOOK: query: SELECT array_except(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),array(1.1234567890, 3.34567890, null,2.234567)) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -107,7 +116,7 @@ POSTHOOK: query: select array_except(value,value) from test_null_array POSTHOOK: type: QUERY POSTHOOK: Input: default@test_null_array #### A masked pattern was here #### -NULL +[] [] [] PREHOOK: query: select value, array_except(value,value) from test_null_array @@ -118,17 +127,6 @@ POSTHOOK: query: select value, array_except(value,value) from test_null_array POSTHOOK: type: QUERY POSTHOOK: Input: default@test_null_array #### A masked pattern was here #### -[] NULL +["NULL"] [] +["null","null"] [] [] [] -[] [] -PREHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),value) from test_null_array -PREHOOK: type: QUERY -PREHOOK: Input: default@test_null_array -#### A masked pattern was here #### -POSTHOOK: query: SELECT array_except(array(1, 2, 3, null,3,4),value) from test_null_array -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_null_array -#### A masked pattern was here #### -[1,2,3,null,4] -[1,2,3,null,4] -[1,2,3,null,4] From f0b9d0e1a0cb3fc9acebcf4a8e98e3960067c22e Mon Sep 17 00:00:00 2001 From: tarak271 Date: Mon, 12 Jun 2023 14:09:27 +0530 Subject: [PATCH 6/7] Update show_functions.q.out --- ql/src/test/results/clientpositive/llap/show_functions.q.out | 1 + 1 file changed, 1 insertion(+) diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out b/ql/src/test/results/clientpositive/llap/show_functions.q.out index 5475988e12ab..d3e74dde3b9b 100644 --- a/ql/src/test/results/clientpositive/llap/show_functions.q.out +++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out @@ -49,6 +49,7 @@ array array_contains array_distinct array_except +array_intersect array_join array_max array_min From 2464f3bcc2641af92b05be17e761c1c995a890ea Mon Sep 17 00:00:00 2001 From: Taraka Rama Rao Lethavadla Date: Mon, 12 Jun 2023 16:52:55 +0530 Subject: [PATCH 7/7] HIVE-27112 - implement array_except UDF in Hive --- .../hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java index 4d7fab09fc7a..19d06d7c6bee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java @@ -39,9 +39,6 @@ public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase { static final int ARRAY2_IDX = 1; private static final String FUNC_NAME = "ARRAY_EXCEPT"; static final String ERROR_NOT_COMPARABLE = "Input arrays are not comparable to use ARRAY_EXCEPT udf"; - private transient ListObjectInspector array2OI; - private transient ObjectInspector arrayElementOI; - private transient ObjectInspector array2ElementOI; public GenericUDFArrayExcept() { super(FUNC_NAME, 2, 2, ObjectInspector.Category.LIST); @@ -49,12 +46,9 @@ public GenericUDFArrayExcept() { @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { ObjectInspector defaultOI = super.initialize(arguments); - array2OI = (ListObjectInspector) arguments[ARRAY2_IDX]; checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST, FUNC_NAME, org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); //Array1 is already getting validated in Parent class - arrayElementOI = arrayOI.getListElementObjectInspector(); - array2ElementOI = array2OI.getListElementObjectInspector(); - if (!ObjectInspectorUtils.compareTypes(arrayElementOI, array2ElementOI)) { // check if elements of arrays are comparable + if (!ObjectInspectorUtils.compareTypes(arrayOI.getListElementObjectInspector(), ((ListObjectInspector) arguments[ARRAY2_IDX]).getListElementObjectInspector())) { // check if elements of arrays are comparable throw new UDFArgumentTypeException(1, ERROR_NOT_COMPARABLE); } return defaultOI;