From fbc212d85880b11d5f0c3f634970d202fb2fbd67 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 13 Aug 2015 12:05:32 +0900 Subject: [PATCH 1/3] TAJO-1754 --- .../function/json/JsonArrayContains.java | 103 ++++++++++++++++++ .../engine/function/json/JsonArrayGet.java | 82 ++++++++++++++ .../engine/function/json/JsonArrayLength.java | 67 ++++++++++++ .../function/json/JsonExtractPathText.java | 12 +- .../function/json/ScalarJsonFunction.java | 20 ++++ .../engine/function/TestJsonFunctions.java | 31 ++++++ 6 files changed, 307 insertions(+), 8 deletions(-) create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java create mode 100644 tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java new file mode 100644 index 0000000000..c0287a9336 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayContains.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import net.minidev.json.JSONArray; +import net.minidev.json.parser.ParseException; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.datum.AnyDatum; +import org.apache.tajo.datum.BooleanDatum; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; +import org.apache.tajo.storage.Tuple; + +@Description( + functionName = "json_array_contains", + description = "Determine if the given value exists in the JSON array", + detail = "Determine if the given value exists in the JSON array.", + example = "> SELECT json_array_contains('[100, 200, 300]', 100);\n" + + "t", + returnType = Type.BOOLEAN, + paramTypes = {@ParamTypes(paramTypes = {Type.TEXT, Type.ANY})} +) +public class JsonArrayContains extends ScalarJsonFunction { + private boolean isLong; + private boolean isDouble; + private boolean isFirst = true; + + public JsonArrayContains() { + super(new Column[]{ + new Column("json_array", TajoDataTypes.Type.TEXT), + new Column("index", TajoDataTypes.Type.ANY) + }); + } + + @Override + public Datum eval(Tuple params) { + if (params.isBlankOrNull(0) || params.isBlankOrNull(1)) { + return NullDatum.get(); + } + + try { + Object parsed = parser.parse(params.getBytes(0)); + if (parsed instanceof JSONArray) { + JSONArray array = (JSONArray) parsed; + Datum actualParam = ((AnyDatum)params.asDatum(1)).getActual(); + + if (isFirst) { + isFirst = false; + if (actualParam.type() == TajoDataTypes.Type.INT1 || + actualParam.type() == TajoDataTypes.Type.INT2 || + actualParam.type() == TajoDataTypes.Type.INT4 || + actualParam.type() == TajoDataTypes.Type.INT8) { + isLong = true; + } else if (actualParam.type() == TajoDataTypes.Type.FLOAT4 || + actualParam.type() == TajoDataTypes.Type.FLOAT8) { + isDouble = true; + } + } + + for (Object eachElem : array) { + if (isLong && eachElem instanceof Long) { + if (actualParam.asInt8() == (Long)eachElem) { + return BooleanDatum.TRUE; + } + } else if (isDouble && eachElem instanceof Double) { + if (actualParam.asFloat8() == (Double)eachElem) { + return BooleanDatum.TRUE; + } + } else { + if (actualParam.asChars().equals(eachElem.toString())) { + return BooleanDatum.TRUE; + } + } + } + return BooleanDatum.FALSE; + } else { + return NullDatum.get(); + } + } catch (ParseException e) { + return NullDatum.get(); + } + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java new file mode 100644 index 0000000000..d27a134662 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import net.minidev.json.JSONArray; +import net.minidev.json.parser.ParseException; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; +import org.apache.tajo.storage.Tuple; + +@Description( + functionName = "json_array_get", + description = "Returns the element at the specified index into the JSON array", + detail = "Returns the element at the specified index into the JSON array.\n " + + "This function returns an element indexed from the end of an array with a negative index,\n " + + "and null if the element at the specified index doesn’t exist.", + example = "> SELECT json_array_get('[100, 200, 300]', 1);\n" + + "100", + returnType = TajoDataTypes.Type.TEXT, + paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, TajoDataTypes.Type.INT4})} +) +public class JsonArrayGet extends ScalarJsonFunction { + private int realIndex = Integer.MAX_VALUE; + + public JsonArrayGet() { + super(new Column[]{ + new Column("json_array", TajoDataTypes.Type.TEXT), + new Column("index", TajoDataTypes.Type.INT4) + }); + } + + @Override + public Datum eval(Tuple params) { + if (params.isBlankOrNull(0) || params.isBlankOrNull(1)) { + return NullDatum.get(); + } + + try { + Object parsed = parser.parse(params.getBytes(0)); + if (parsed instanceof JSONArray) { + JSONArray array = (JSONArray) parsed; + if (realIndex == Integer.MAX_VALUE) { + int givenIndex = params.getInt4(1); + // Zero and positive given index points out the element from the left side, + // while negative given index points out the element from the right side. + realIndex = givenIndex < 0 ? array.size() + givenIndex : givenIndex; + } + + if (realIndex >= array.size() || realIndex < 0) { + return NullDatum.get(); + } else { + return DatumFactory.createText(array.get(realIndex).toString()); + } + } else { + return NullDatum.get(); + } + } catch (ParseException e) { + return NullDatum.get(); + } + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java new file mode 100644 index 0000000000..ff0a05a500 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayLength.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tajo.engine.function.json; + +import net.minidev.json.JSONArray; +import net.minidev.json.parser.ParseException; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.common.TajoDataTypes; +import org.apache.tajo.common.TajoDataTypes.Type; +import org.apache.tajo.datum.Datum; +import org.apache.tajo.datum.DatumFactory; +import org.apache.tajo.datum.NullDatum; +import org.apache.tajo.engine.function.annotation.Description; +import org.apache.tajo.engine.function.annotation.ParamTypes; +import org.apache.tajo.storage.Tuple; + +@Description( + functionName = "json_array_length", + description = "Returns the length of json array", + detail = "Returns the length of json array.", + example = "> SELECT json_array_length('[100, 200, 300]');\n" + + "3", + returnType = Type.INT8, + paramTypes = {@ParamTypes(paramTypes = {Type.TEXT})} +) +public class JsonArrayLength extends ScalarJsonFunction { + + public JsonArrayLength() { + super(new Column[]{ + new Column("string", TajoDataTypes.Type.TEXT), + }); + } + + @Override + public Datum eval(Tuple params) { + if (params.isBlankOrNull(0)) { + return NullDatum.get(); + } + try { + Object parsed = parser.parse(params.getBytes(0)); + if (parsed instanceof JSONArray) { + JSONArray array = (JSONArray) parsed; + return DatumFactory.createInt8(array.size()); + } else { + return NullDatum.get(); + } + } catch (ParseException e) { + return NullDatum.get(); + } + } +} diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java index 84c14c6d97..ced77890e5 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonExtractPathText.java @@ -20,7 +20,6 @@ import com.jayway.jsonpath.JsonPath; import net.minidev.json.JSONObject; -import net.minidev.json.parser.JSONParser; import org.apache.tajo.catalog.Column; import org.apache.tajo.common.TajoDataTypes; import org.apache.tajo.datum.Datum; @@ -28,13 +27,12 @@ import org.apache.tajo.datum.NullDatum; import org.apache.tajo.engine.function.annotation.Description; import org.apache.tajo.engine.function.annotation.ParamTypes; -import org.apache.tajo.plan.function.GeneralFunction; import org.apache.tajo.storage.Tuple; /** * json_extract_path_text(string, string) - * Extracts JSON string from a JSON string based on json path specified, - * and returns JSON string pointed to by xPath. + * and returns JSON string pointed to by JSONPath. * * * Returns null if either argument is null. @@ -45,16 +43,15 @@ */ @Description( functionName = "json_extract_path_text", - description = "Returns JSON string pointed to by xPath", + description = "Returns JSON string pointed to by JSONPath", detail = "Extracts JSON string from a JSON string based on json path specified,\n" - + "and returns JSON string pointed to by xPath.", + + "and returns JSON string pointed to by JSONPath.", example = "> SELECT json_extract_path_text('{\"sample\" : {\"name\" : \"tajo\"}}','$.sample.name');\n" + "tajo", returnType = TajoDataTypes.Type.TEXT, paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, TajoDataTypes.Type.TEXT})} ) -public class JsonExtractPathText extends GeneralFunction { - private JSONParser parser; +public class JsonExtractPathText extends ScalarJsonFunction { private JsonPath jsonPath; public JsonExtractPathText() { @@ -62,7 +59,6 @@ public JsonExtractPathText() { new Column("string", TajoDataTypes.Type.TEXT), new Column("string", TajoDataTypes.Type.TEXT), }); - parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | JSONParser.IGNORE_CONTROL_CHAR); } @Override diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java new file mode 100644 index 0000000000..5a6afe2b29 --- /dev/null +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java @@ -0,0 +1,20 @@ +package org.apache.tajo.engine.function.json; + +import net.minidev.json.parser.JSONParser; +import org.apache.tajo.OverridableConf; +import org.apache.tajo.catalog.Column; +import org.apache.tajo.plan.expr.FunctionEval; +import org.apache.tajo.plan.function.GeneralFunction; + +public abstract class ScalarJsonFunction extends GeneralFunction { + protected JSONParser parser; + + public ScalarJsonFunction(Column[] definedArgs) { + super(definedArgs); + } + + @Override + public void init(OverridableConf queryContext, FunctionEval.ParamType [] paramTypes) { + parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | JSONParser.IGNORE_CONTROL_CHAR); + } +} diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java index 57248a8d45..1792075278 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestJsonFunctions.java @@ -25,11 +25,42 @@ public class TestJsonFunctions extends ExprTestBase { static final String JSON_DOCUMENT = "{\"map\" : {\"name\" : \"tajo\"}, \"array\" : [1,2,3]}"; + static final String JSON_ARRAY = "[100,200,300,400,500]"; + static final String JSON_COMPLEX_ARRAY = "[100, \"test\", \"2015-08-13 11:58:59\", 0.899999999999]"; + static final String JSON_EMPTY_ARRAY = "[]"; @Test public void testJsonExtractPathText() throws TajoException { testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', '$.map.name') ", new String[]{"tajo"}); testSimpleEval("select json_extract_path_text('" + JSON_DOCUMENT + "', '$.array[1]') ", new String[]{"2"}); + } + + @Test + public void testJsonArrayGet() throws TajoException { + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', 0)", new String[]{"100"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', 2)", new String[]{"300"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', -1)", new String[]{"500"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', -2)", new String[]{"400"}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', 10)", new String[]{""}); + testSimpleEval("select json_array_get('" + JSON_ARRAY + "', -10)", new String[]{""}); + testSimpleEval("select json_array_get('" + JSON_EMPTY_ARRAY + "', 0)", new String[]{""}); + } + @Test + public void testJsonArrayContains() throws TajoException { + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 100)", new String[]{"t"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 'test')", new String[]{"t"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', '2015-08-13 11:58:59'::timestamp)", + new String[]{"t"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', '2015-08-13 11:58:59'::date)", + new String[]{"f"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 1000)", new String[]{"f"}); + testSimpleEval("select json_array_contains('" + JSON_COMPLEX_ARRAY + "', 0.899999999999)", new String[]{"t"}); + } + + @Test + public void testJsonArrayLength() throws TajoException { + testSimpleEval("select json_array_length('" + JSON_ARRAY + "')", new String[]{"5"}); + testSimpleEval("select json_array_length('" + JSON_EMPTY_ARRAY + "')", new String[]{"0"}); } } From 7382d4425403bca3f3151ffafbbe99024ea84c2e Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 13 Aug 2015 12:21:05 +0900 Subject: [PATCH 2/3] add missing license --- .../function/json/ScalarJsonFunction.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java index 5a6afe2b29..db95aaa988 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/ScalarJsonFunction.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.tajo.engine.function.json; import net.minidev.json.parser.JSONParser; From a568fef42688fc6fff591bf52af13038f00e99ce Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Thu, 13 Aug 2015 12:53:59 +0900 Subject: [PATCH 3/3] Add a document --- .../engine/function/json/JsonArrayGet.java | 2 +- .../src/main/sphinx/functions/json_func.rst | 55 +++++++++++++++++-- 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java index d27a134662..412337ec02 100644 --- a/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java +++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/json/JsonArrayGet.java @@ -36,7 +36,7 @@ "This function returns an element indexed from the end of an array with a negative index,\n " + "and null if the element at the specified index doesn’t exist.", example = "> SELECT json_array_get('[100, 200, 300]', 1);\n" + - "100", + "200", returnType = TajoDataTypes.Type.TEXT, paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.TEXT, TajoDataTypes.Type.INT4})} ) diff --git a/tajo-docs/src/main/sphinx/functions/json_func.rst b/tajo-docs/src/main/sphinx/functions/json_func.rst index d05cc1baa4..904be5b587 100644 --- a/tajo-docs/src/main/sphinx/functions/json_func.rst +++ b/tajo-docs/src/main/sphinx/functions/json_func.rst @@ -2,16 +2,61 @@ JSON Functions ******************************* -.. function:: json_extract_path_text (string json, string xpath) +.. function:: json_extract_path_text (string json, string json_path) - Extracts JSON string from a JSON string based on json path specified and returns JSON string pointed to by xPath + Extracts JSON string from a JSON string based on json path specified and returns JSON string pointed to by JSONPath. + Returns null if either argument is null. - :param string: - :param string: + :param json: JSON string + :param json_path: JSONpath :rtype: text :example: .. code-block:: sql - json_extract_path_text('{"test" : {"key" : "tajo"}}','$.test.key'); + select json_extract_path_text('{"test" : {"key" : "tajo"}}','$.test.key'); > tajo + +.. function:: json_array_get (string json_array, int index) + + Returns the element at the specified index into the JSON array. This function returns an element indexed from the end of an array with a negative index, and null if the element at the specified index doesn’t exist. + + :param json_array: String of a JSON array + :param index: index + :rtype: text + :example: + + .. code-block:: sql + + select json_array_get('[100, 200, 300]', 0); + > 100 + + select json_array_get('[100, 200, 300]', -2); + > 200 + +.. function:: json_array_contains (string json_array, any value) + + Determine if the given value exists in the JSON array. + + :param json_array: String of a JSON array + :param value: value of any type + :rtype: text + :example: + + .. code-block:: sql + + select json_array_contains('[100, 200, 300]', 100); + > t + +.. function:: json_array_length(string json_array) + + Returns the length of json array. + + :param json_array: String of a JSON array + :rtype: int8 + :example: + + .. code-block:: sql + + select json_array_length('[100, 200, 300]'); + > 3