Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,7 @@ public final class FunctionRegistry {
system.registerGenericUDF("array_min", GenericUDFArrayMin.class);
system.registerGenericUDF("array_max", GenericUDFArrayMax.class);
system.registerGenericUDF("array_distinct", GenericUDFArrayDistinct.class);
system.registerGenericUDF("array_join", GenericUDFArrayJoin.class);
system.registerGenericUDF("array_slice", GenericUDFArraySlice.class);
system.registerGenericUDF("deserialize", GenericUDFDeserialize.class);
system.registerGenericUDF("sentences", GenericUDFSentences.class);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.udf.generic;

import com.google.common.base.Joiner;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;

import java.util.List;

/**
* GenericUDFArrayjoin.
*/
@Description(name = "array_join", value = "_FUNC_(array, delimiter, replaceNull) - concatenate the elements of an array with a specified delimiter", extended =
"Example:\n" + " > SELECT _FUNC_(array(1, 2, 3,4), ',') FROM src LIMIT 1;\n" + " 1,2,3,4\n"
+ " > SELECT _FUNC_(array(1, 2, NULL, 4), ',',':') FROM src LIMIT 1;\n"
+ " 1,2,:,4") public class GenericUDFArrayJoin extends AbstractGenericUDFArrayBase {
private static final int SEPARATOR_IDX = 1;
private static final int REPLACE_NULL_IDX = 2;
private final Text result = new Text();

public GenericUDFArrayJoin() {
super("ARRAY_JOIN", 2, 3, ObjectInspector.Category.PRIMITIVE);
}

@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
super.initialize(arguments);
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}

@Override public Object evaluate(DeferredObject[] arguments) throws HiveException {

Object array = arguments[ARRAY_IDX].get();

if (arrayOI.getListLength(array) <= 0) {
return null;
}

List<?> retArray = ((ListObjectInspector) argumentOIs[ARRAY_IDX]).getList(array);
String separator = arguments[SEPARATOR_IDX].get().toString();
if (arguments.length > REPLACE_NULL_IDX && arguments[REPLACE_NULL_IDX].get() != null) {
result.set(Joiner.on(separator).useForNull(arguments[REPLACE_NULL_IDX].get().toString()).join(retArray));
} else {
result.set(Joiner.on(separator).skipNulls().join(retArray));
}
return result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.junit.Assert;
import org.junit.Test;

import java.util.List;

import static java.util.Arrays.asList;

public class TestGenericUDFArrayJoin {
private final GenericUDFArrayJoin udf = new GenericUDFArrayJoin();

@Test public void testPrimitive() throws HiveException {
ObjectInspector[] inputOIs = { ObjectInspectorFactory.getStandardListObjectInspector(
PrimitiveObjectInspectorFactory.writableIntObjectInspector),
PrimitiveObjectInspectorFactory.writableStringObjectInspector,
PrimitiveObjectInspectorFactory.writableStringObjectInspector };
udf.initialize(inputOIs);

Object i1 = new IntWritable(3);
Object i2 = new IntWritable(1);
Object i3 = new IntWritable(2);
Object i4 = new IntWritable(1);
runAndVerify(asList(i1, i2, i3, i4), ",", null, i1 + "," + i2 + "," + i3 + "," + i4);

i1 = new FloatWritable(3.3f);
i2 = new FloatWritable(1.1f);
i3 = new FloatWritable(3.3f);
i4 = new FloatWritable(2.20f);
runAndVerify(asList(i1, null, i2, i3, null, i4), ",", ":", i1 + ",:," + i2 + "," + i3 + ",:," + i4);

i1 = new Text("aa1");
i2 = new Text("aa2");
i3 = new Text("aa3");
i4 = new Text("aa4");
runAndVerify(asList(i1, null, i2, i3, null, i4), ":", null, i1 + ":" + i2 + ":" + i3 + ":" + i4);

}

private void runAndVerify(List<Object> actual, String separator, String replaceNull, String expected)
throws HiveException {
GenericUDF.DeferredJavaObject[] args = { new GenericUDF.DeferredJavaObject(actual),
new GenericUDF.DeferredJavaObject(separator != null ? new Text(separator) : null),
new GenericUDF.DeferredJavaObject(replaceNull != null ? new Text(replaceNull) : null) };
Text result = (Text) udf.evaluate(args);
Assert.assertEquals("Not equal", expected, result.toString());
}
}

40 changes: 40 additions & 0 deletions ql/src/test/queries/clientpositive/udf_array_join.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
--! qt:dataset:src

-- SORT_QUERY_RESULTS

set hive.fetch.task.conversion=more;

DESCRIBE FUNCTION array_join;
DESCRIBE FUNCTION EXTENDED array_join;

-- evalutes function for array of primitives
SELECT array_join(array(1, 2, 3, null,3,4),',') FROM src tablesample (1 rows);

SELECT array_join(array(),':') FROM src tablesample (1 rows);

SELECT array_join(array(null),',') FROM src tablesample (1 rows);

SELECT array_join(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),',',':') FROM src tablesample (1 rows);

SELECT array_join(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),',',':') FROM src tablesample (1 rows);

SELECT array_join(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),',',':') FROM src tablesample (1 rows);

SELECT array_join(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),',',':') FROM src tablesample (1 rows);

SELECT array_join(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d",null)),',',':') FROM src tablesample (1 rows);

# handle null array cases

dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/test_null_array;

dfs -copyFromLocal ../../data/files/test_null_array.csv ${system:test.tmp.dir}/test_null_array/;

create external table test_null_array (id int, value Array<String>) ROW FORMAT DELIMITED
FIELDS TERMINATED BY ':' collection items terminated by ',' location '${system:test.tmp.dir}/test_null_array';

select value from test_null_array;

select array_join(value,',',':') from test_null_array;

dfs -rm -r ${system:test.tmp.dir}/test_null_array;
2 changes: 2 additions & 0 deletions ql/src/test/results/clientpositive/llap/show_functions.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ approx_distinct
array
array_contains
array_distinct
array_join
array_max
array_min
array_slice
Expand Down Expand Up @@ -668,6 +669,7 @@ approx_distinct
array
array_contains
array_distinct
array_join
array_max
array_min
array_slice
Expand Down
123 changes: 123 additions & 0 deletions ql/src/test/results/clientpositive/llap/udf_array_join.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
PREHOOK: query: DESCRIBE FUNCTION array_join
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION array_join
POSTHOOK: type: DESCFUNCTION
array_join(array, delimiter, replaceNull) - concatenate the elements of an array with a specified delimiter
PREHOOK: query: DESCRIBE FUNCTION EXTENDED array_join
PREHOOK: type: DESCFUNCTION
POSTHOOK: query: DESCRIBE FUNCTION EXTENDED array_join
POSTHOOK: type: DESCFUNCTION
array_join(array, delimiter, replaceNull) - concatenate the elements of an array with a specified delimiter
Example:
> SELECT array_join(array(1, 2, 3,4), ',') FROM src LIMIT 1;
1,2,3,4
> SELECT array_join(array(1, 2, NULL, 4), ',',':') FROM src LIMIT 1;
1,2,:,4
Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayJoin
Function type:BUILTIN
PREHOOK: query: SELECT array_join(array(1, 2, 3, null,3,4),',') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(1, 2, 3, null,3,4),',') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
1,2,3,3,4
PREHOOK: query: SELECT array_join(array(),':') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(),':') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
NULL
PREHOOK: query: SELECT array_join(array(null),',') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(null),',') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####

PREHOOK: query: SELECT array_join(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),',',':') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9),',',':') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
1.12,2.23,3.34,:,1.11,1.12,2.9
PREHOOK: query: SELECT array_join(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),',',':') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(1.1234567890, 2.234567890, 3.34567890, null, 3.3456789, 2.234567,1.1234567890),',',':') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
1.123456789,2.23456789,3.3456789,:,3.3456789,2.234567,1.123456789
PREHOOK: query: SELECT array_join(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),',',':') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(11234567890, 2234567890, 334567890, null, 11234567890, 2234567890, 334567890, null),',',':') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
11234567890,2234567890,334567890,:,11234567890,2234567890,334567890,:
PREHOOK: query: SELECT array_join(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),',',':') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")),',',':') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
[a, b, c, d],[a, b, c, d],[a, b, c, d, e],:,[e, a, b, c, d]
PREHOOK: query: SELECT array_join(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d",null)),',',':') FROM src tablesample (1 rows)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
#### A masked pattern was here ####
POSTHOOK: query: SELECT array_join(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d",null)),',',':') FROM src tablesample (1 rows)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
[a, b, c, d],[a, b, c, d],[a, b, c, d, e],:,[e, a, b, c, d, null]
PREHOOK: query: create external table test_null_array (id int, value Array<String>) ROW FORMAT DELIMITED
#### A masked pattern was here ####
PREHOOK: type: CREATETABLE
#### A masked pattern was here ####
PREHOOK: Output: database:default
PREHOOK: Output: default@test_null_array
POSTHOOK: query: create external table test_null_array (id int, value Array<String>) ROW FORMAT DELIMITED
#### A masked pattern was here ####
POSTHOOK: type: CREATETABLE
#### A masked pattern was here ####
POSTHOOK: Output: database:default
POSTHOOK: Output: default@test_null_array
PREHOOK: query: select value from test_null_array
PREHOOK: type: QUERY
PREHOOK: Input: default@test_null_array
#### A masked pattern was here ####
POSTHOOK: query: select value from test_null_array
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_null_array
#### A masked pattern was here ####
["NULL"]
["null","null"]
[]
PREHOOK: query: select array_join(value,',',':') from test_null_array
PREHOOK: type: QUERY
PREHOOK: Input: default@test_null_array
#### A masked pattern was here ####
POSTHOOK: query: select array_join(value,',',':') from test_null_array
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_null_array
#### A masked pattern was here ####
NULL
NULL
null,null