From d44aa986aea8c6f893edb2fa5525b2ca8bdef3b4 Mon Sep 17 00:00:00 2001 From: "Charles S. Givre" Date: Tue, 9 Jul 2024 11:01:04 -0400 Subject: [PATCH] DRILL-8501: Json Conversion UDF Not Respecting System JSON Options (#2921) --- .../expr/fn/impl/conv/JsonConvertFrom.java | 291 ++++++++++++++++-- .../store/json/TestJsonConversionUDF.java | 223 ++++++++++++++ .../drill/exec/store/json/TestJsonNanInf.java | 101 ++---- .../test/resources/jsoninput/allTypes.csvh | 2 + 4 files changed, 516 insertions(+), 101 deletions(-) create mode 100644 exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonConversionUDF.java create mode 100644 exec/java-exec/src/test/resources/jsoninput/allTypes.csvh diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java index bbfc2829147..61e64a55dcc 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/conv/JsonConvertFrom.java @@ -19,24 +19,23 @@ import io.netty.buffer.DrillBuf; - -import javax.inject.Inject; - import org.apache.drill.exec.expr.DrillSimpleFunc; import org.apache.drill.exec.expr.annotations.FunctionTemplate; import org.apache.drill.exec.expr.annotations.FunctionTemplate.FunctionScope; import org.apache.drill.exec.expr.annotations.Output; import org.apache.drill.exec.expr.annotations.Param; import org.apache.drill.exec.expr.annotations.Workspace; +import org.apache.drill.exec.expr.holders.BitHolder; import org.apache.drill.exec.expr.holders.NullableVarBinaryHolder; import org.apache.drill.exec.expr.holders.NullableVarCharHolder; import org.apache.drill.exec.expr.holders.VarBinaryHolder; import org.apache.drill.exec.expr.holders.VarCharHolder; +import org.apache.drill.exec.server.options.OptionManager; import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter; -public class JsonConvertFrom { +import javax.inject.Inject; - static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(JsonConvertFrom.class); +public class JsonConvertFrom { private JsonConvertFrom() { } @@ -44,16 +43,75 @@ private JsonConvertFrom() { @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) public static class ConvertFromJson implements DrillSimpleFunc { - @Param VarBinaryHolder in; - @Inject DrillBuf buffer; - @Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + @Param + VarBinaryHolder in; - @Output ComplexWriter writer; + @Inject + DrillBuf buffer; + + @Inject + OptionManager options; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + + @Output + ComplexWriter writer; @Override public void setup() { + boolean allTextMode = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_ALL_TEXT_MODE); + boolean readNumbersAsDouble = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) + .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) + .build(); + } + + @Override + public void eval() { + try { + jsonReader.setSource(in.start, in.end, in.buffer); + jsonReader.write(writer); + buffer = jsonReader.getWorkBuf(); + } catch (Exception e) { + throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e); + } + } + } + + @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) + public static class ConvertFromJsonWithArgs implements DrillSimpleFunc { + + @Param + VarBinaryHolder in; + + @Param + BitHolder allTextModeHolder; + + @Param + BitHolder readNumbersAsDoubleHolder; + + @Inject + DrillBuf buffer; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + + @Output + ComplexWriter writer; + + @Override + public void setup() { + boolean allTextMode = allTextModeHolder.value == 1; + boolean readNumbersAsDouble = readNumbersAsDoubleHolder.value == 1; + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) .build(); } @@ -69,19 +127,79 @@ public void eval() { } } + @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) public static class ConvertFromJsonVarchar implements DrillSimpleFunc { - @Param VarCharHolder in; - @Inject DrillBuf buffer; - @Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + @Param + VarCharHolder in; - @Output ComplexWriter writer; + @Inject + DrillBuf buffer; + + @Inject + OptionManager options; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + + @Output + ComplexWriter writer; @Override public void setup() { + boolean allTextMode = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_ALL_TEXT_MODE); + boolean readNumbersAsDouble = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) + .build(); + } + + @Override + public void eval() { + try { + jsonReader.setSource(in.start, in.end, in.buffer); + jsonReader.write(writer); + buffer = jsonReader.getWorkBuf(); + } catch (Exception e) { + throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e); + } + } + } + + @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) + public static class ConvertFromJsonVarcharWithConfig implements DrillSimpleFunc { + + @Param + VarCharHolder in; + + @Param + BitHolder allTextModeHolder; + + @Param + BitHolder readNumbersAsDoubleHolder; + + @Inject + DrillBuf buffer; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + + @Output + ComplexWriter writer; + + @Override + public void setup() { + boolean allTextMode = allTextModeHolder.value == 1; + boolean readNumbersAsDouble = readNumbersAsDoubleHolder.value == 1; + + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) + .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) .build(); } @@ -100,16 +218,83 @@ public void eval() { @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) public static class ConvertFromJsonNullableInput implements DrillSimpleFunc { - @Param NullableVarBinaryHolder in; - @Inject DrillBuf buffer; - @Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + @Param + NullableVarBinaryHolder in; - @Output ComplexWriter writer; + @Inject + DrillBuf buffer; + + @Inject + OptionManager options; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + + @Output + ComplexWriter writer; + + @Override + public void setup() { + boolean allTextMode = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_ALL_TEXT_MODE); + boolean readNumbersAsDouble = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) + .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) + .build(); + } + + @Override + public void eval() { + if (in.isSet == 0) { + // Return empty map + org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter mapWriter = writer.rootAsMap(); + mapWriter.start(); + mapWriter.end(); + return; + } + + try { + jsonReader.setSource(in.start, in.end, in.buffer); + jsonReader.write(writer); + buffer = jsonReader.getWorkBuf(); + } catch (Exception e) { + throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e); + } + } + } + + @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) + public static class ConvertFromJsonNullableInputWithArgs implements DrillSimpleFunc { + + @Param + NullableVarBinaryHolder in; + + @Param + BitHolder allTextModeHolder; + + @Param + BitHolder readNumbersAsDoubleHolder; + + @Inject + DrillBuf buffer; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + + @Output + ComplexWriter writer; @Override public void setup() { + boolean allTextMode = allTextModeHolder.value == 1; + boolean readNumbersAsDouble = readNumbersAsDoubleHolder.value == 1; + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) .build(); } @@ -136,16 +321,29 @@ public void eval() { @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) public static class ConvertFromJsonVarcharNullableInput implements DrillSimpleFunc { - @Param NullableVarCharHolder in; - @Inject DrillBuf buffer; - @Workspace org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + @Param + NullableVarCharHolder in; + + @Inject + DrillBuf buffer; + + @Inject + OptionManager options; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; @Output ComplexWriter writer; @Override public void setup() { + boolean allTextMode = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_ALL_TEXT_MODE); + boolean readNumbersAsDouble = options.getBoolean(org.apache.drill.exec.ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) .build(); } @@ -168,4 +366,57 @@ public void eval() { } } } + + @FunctionTemplate(name = "convert_fromJSON", scope = FunctionScope.SIMPLE, isRandom = true) + public static class ConvertFromJsonVarcharNullableInputWithConfigs implements DrillSimpleFunc { + + @Param + NullableVarCharHolder in; + + @Param + BitHolder allTextModeHolder; + + @Param + BitHolder readNumbersAsDoubleHolder; + + @Inject + DrillBuf buffer; + + @Workspace + org.apache.drill.exec.vector.complex.fn.JsonReader jsonReader; + + @Output ComplexWriter writer; + + @Override + public void setup() { + boolean allTextMode = allTextModeHolder.value == 1; + boolean readNumbersAsDouble = readNumbersAsDoubleHolder.value == 1; + + jsonReader = new org.apache.drill.exec.vector.complex.fn.JsonReader.Builder(buffer) + .defaultSchemaPathColumns() + .allTextMode(allTextMode) + .readNumbersAsDouble(readNumbersAsDouble) + .build(); + } + + @Override + public void eval() { + if (in.isSet == 0) { + // Return empty map + org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter mapWriter = writer.rootAsMap(); + mapWriter.start(); + mapWriter.end(); + return; + } + + try { + jsonReader.setSource(in.start, in.end, in.buffer); + jsonReader.write(writer); + buffer = jsonReader.getWorkBuf(); + } catch (Exception e) { + throw new org.apache.drill.common.exceptions.DrillRuntimeException("Error while converting from JSON. ", e); + } + } + } + } diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonConversionUDF.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonConversionUDF.java new file mode 100644 index 00000000000..d6fa857a390 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonConversionUDF.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.drill.exec.store.json; + +import org.apache.commons.io.FileUtils; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.test.BaseTestQuery; +import org.junit.Test; + +import java.io.File; +import java.nio.charset.Charset; +import java.util.List; + +import static org.apache.drill.test.TestBuilder.mapOf; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +public class TestJsonConversionUDF extends BaseTestQuery { + + @Test + public void doTestConvertFromJsonFunction() throws Exception { + String table = "nan_test.csv"; + File file = new File(dirTestWatcher.getRootDir(), table); + String csv = "col_0, {\"nan_col\":NaN}"; + try { + FileUtils.writeStringToFile(file, csv, Charset.defaultCharset()); + testBuilder() + .sqlQuery(String.format("select convert_fromJSON(columns[1]) as col from dfs.`%s`", table)) + .unOrdered() + .baselineColumns("col") + .baselineValues(mapOf("nan_col", Double.NaN)) + .go(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test + public void doTestConvertToJsonFunction() throws Exception { + String table = "nan_test.csv"; + File file = new File(dirTestWatcher.getRootDir(), table); + String csv = "col_0, {\"nan_col\":NaN}"; + String query = String.format("select string_binary(convert_toJSON(convert_fromJSON(columns[1]))) as col " + + "from dfs.`%s` where columns[0]='col_0'", table); + try { + FileUtils.writeStringToFile(file, csv, Charset.defaultCharset()); + List results = testSqlWithResults(query); + RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator()); + assertEquals("Query result must contain 1 row", 1, results.size()); + QueryDataBatch batch = results.get(0); + + batchLoader.load(batch.getHeader().getDef(), batch.getData()); + VectorWrapper vw = batchLoader.getValueAccessorById(VarCharVector.class, batchLoader.getValueVectorId(SchemaPath.getCompoundPath("col")).getFieldIds()); + // ensuring that `NaN` token ARE NOT enclosed with double quotes + String resultJson = vw.getValueVector().getAccessor().getObject(0).toString(); + int nanIndex = resultJson.indexOf("NaN"); + assertNotEquals("`NaN` must not be enclosed with \"\" ", '"', resultJson.charAt(nanIndex - 1)); + assertNotEquals("`NaN` must not be enclosed with \"\" ", '"', resultJson.charAt(nanIndex + "NaN".length())); + batch.release(); + batchLoader.clear(); + } finally { + FileUtils.deleteQuietly(file); + } + } + + @Test + public void testAllTextMode() throws Exception { + alterSession(ExecConstants.JSON_ALL_TEXT_MODE, true); + alterSession(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE, false); + + String sql = "SELECT \n" + + "typeof(jsonMap['bi']) AS bi, \n" + + "typeof(jsonMap['fl']) AS fl, \n" + + "typeof(jsonMap['st']) AS st, \n" + + "typeof(jsonMap['mp']) AS mp, \n" + + "typeof(jsonMap['ar']) AS ar, \n" + + "typeof(jsonMap['nu']) AS nu\n" + + "FROM(\n" + + "SELECT convert_fromJSON(col1) AS jsonMap, \n" + + "col2 \n" + + "FROM cp.`jsoninput/allTypes.csvh`\n" + + ")"; + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("bi", "fl", "st", "mp", "ar", "nu") + .baselineValues("VARCHAR", "VARCHAR", "VARCHAR", "MAP", "VARCHAR", "NULL") + .go(); + + alterSession(ExecConstants.JSON_ALL_TEXT_MODE, false); + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("bi", "fl", "st", "mp", "ar", "nu") + .baselineValues("BIGINT", "FLOAT8", "VARCHAR", "MAP", "BIGINT", "NULL") + .go(); + + resetSessionOption(ExecConstants.JSON_ALL_TEXT_MODE); + resetSessionOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + } + + @Test + public void testAllTextModeFromArgs() throws Exception { + // Set the system options to make sure that the UDF is using the provided options rather than + // the system options. + alterSession(ExecConstants.JSON_ALL_TEXT_MODE, false); + alterSession(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE, true); + + String sql = "SELECT \n" + + "typeof(jsonMap['bi']) AS bi, \n" + + "typeof(jsonMap['fl']) AS fl, \n" + + "typeof(jsonMap['st']) AS st, \n" + + "typeof(jsonMap['mp']) AS mp, \n" + + "typeof(jsonMap['ar']) AS ar, \n" + + "typeof(jsonMap['nu']) AS nu\n" + + "FROM(\n" + + "SELECT convert_fromJSON(col1, true, false) AS jsonMap, \n" + + "col2 \n" + + "FROM cp.`jsoninput/allTypes.csvh`\n" + + ")"; + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("bi", "fl", "st", "mp", "ar", "nu") + .baselineValues("VARCHAR", "VARCHAR", "VARCHAR", "MAP", "VARCHAR", "NULL") + .go(); + + resetSessionOption(ExecConstants.JSON_ALL_TEXT_MODE); + resetSessionOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + } + + + @Test + public void testReadNumbersAsDouble() throws Exception { + alterSession(ExecConstants.JSON_ALL_TEXT_MODE, false); + alterSession(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE, true); + + String sql = "SELECT \n" + + "typeof(jsonMap['bi']) AS bi, \n" + + "typeof(jsonMap['fl']) AS fl, \n" + + "typeof(jsonMap['st']) AS st, \n" + + "typeof(jsonMap['mp']) AS mp, \n" + + "typeof(jsonMap['ar']) AS ar, \n" + + "typeof(jsonMap['nu']) AS nu\n" + + "FROM(\n" + + "SELECT convert_fromJSON(col1) AS jsonMap, \n" + + "col2 \n" + + "FROM cp.`jsoninput/allTypes.csvh`\n" + + ")"; + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("bi", "fl", "st", "mp", "ar", "nu") + .baselineValues("FLOAT8", "FLOAT8", "VARCHAR", "MAP", "FLOAT8", "NULL") + .go(); + + alterSession(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE, true); + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("bi", "fl", "st", "mp", "ar", "nu") + .baselineValues("FLOAT8", "FLOAT8", "VARCHAR", "MAP", "FLOAT8", "NULL") + .go(); + + resetSessionOption(ExecConstants.JSON_ALL_TEXT_MODE); + resetSessionOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + } + + @Test + public void testReadNumbersAsDoubleFromArgs() throws Exception { + // Set the system options to make sure that the UDF is using the provided options rather than + // the system options. + alterSession(ExecConstants.JSON_ALL_TEXT_MODE, true); + alterSession(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE, true); + String sql = "SELECT \n" + + "typeof(jsonMap['bi']) AS bi, \n" + + "typeof(jsonMap['fl']) AS fl, \n" + + "typeof(jsonMap['st']) AS st, \n" + + "typeof(jsonMap['mp']) AS mp, \n" + + "typeof(jsonMap['ar']) AS ar, \n" + + "typeof(jsonMap['nu']) AS nu\n" + + "FROM(\n" + + "SELECT convert_fromJSON(col1, false, true) AS jsonMap, \n" + + "col2 \n" + + "FROM cp.`jsoninput/allTypes.csvh`\n" + + ")"; + + testBuilder() + .sqlQuery(sql) + .unOrdered() + .baselineColumns("bi", "fl", "st", "mp", "ar", "nu") + .baselineValues("FLOAT8", "FLOAT8", "VARCHAR", "MAP", "FLOAT8", "NULL") + .go(); + + resetSessionOption(ExecConstants.JSON_ALL_TEXT_MODE); + resetSessionOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); + } +} diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonNanInf.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonNanInf.java index e556ec16ea1..ae4e2f8be72 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonNanInf.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonNanInf.java @@ -17,31 +17,23 @@ */ package org.apache.drill.exec.store.json; -import static org.apache.drill.test.TestBuilder.mapOf; -import static org.hamcrest.CoreMatchers.containsString; -import static org.junit.Assert.assertEquals; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.fail; - -import java.io.File; -import java.util.List; - import org.apache.commons.io.FileUtils; import org.apache.drill.common.exceptions.UserRemoteException; -import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.ExecConstants; import org.apache.drill.exec.physical.impl.join.JoinTestBase; -import org.apache.drill.exec.record.RecordBatchLoader; -import org.apache.drill.exec.record.VectorWrapper; -import org.apache.drill.exec.rpc.user.QueryDataBatch; -import org.apache.drill.exec.vector.VarCharVector; import org.apache.drill.exec.store.json.TestJsonReader.TestWrapper; import org.apache.drill.test.BaseTestQuery; import org.junit.Ignore; import org.junit.Test; -// TODO: Split or rename: this tests mor than NanInf +import java.io.File; +import java.nio.charset.Charset; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.fail; + public class TestJsonNanInf extends BaseTestQuery { public void runBoth(TestWrapper wrapper) throws Exception { @@ -66,7 +58,7 @@ private void doTestNanInfSelect() throws Exception { String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; String query = String.format("select * from dfs.`%s`",table); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); testBuilder() .sqlQuery(query) .unOrdered() @@ -91,7 +83,7 @@ private void doTestExcludePositiveInfinity() throws Exception { "{\"nan_col\":5.0, \"inf_col\":5.0}]"; String query = String.format("select inf_col from dfs.`%s` where inf_col <> cast('Infinity' as double)",table); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); testBuilder() .sqlQuery(query) .unOrdered() @@ -116,7 +108,7 @@ private void doTestExcludeNegativeInfinity() throws Exception { "{\"nan_col\":5.0, \"inf_col\":5.0}]"; String query = String.format("select inf_col from dfs.`%s` where inf_col <> cast('-Infinity' as double)",table); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); testBuilder() .sqlQuery(query) .unOrdered() @@ -141,7 +133,7 @@ private void doTestIncludePositiveInfinity() throws Exception { "{\"nan_col\":5.0, \"inf_col\":5.0}]"; String query = String.format("select inf_col from dfs.`%s` where inf_col = cast('Infinity' as double)",table); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); testBuilder() .sqlQuery(query) .unOrdered() @@ -166,7 +158,7 @@ private void doTestExcludeNan() throws Exception { "{\"nan_col\":5.0, \"inf_col\":5.0}]"; String query = String.format("select nan_col from dfs.`%s` where cast(nan_col as varchar) <> 'NaN'",table); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); testBuilder() .sqlQuery(query) .unOrdered() @@ -190,7 +182,7 @@ private void doTestIncludeNan() throws Exception { "{\"nan_col\":5.0, \"inf_col\":5.0}]"; String query = String.format("select nan_col from dfs.`%s` where cast(nan_col as varchar) = 'NaN'",table); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); testBuilder() .sqlQuery(query) .unOrdered() @@ -213,7 +205,7 @@ private void doTestNanInfFailure() throws Exception { test("alter session set `%s` = false", ExecConstants.JSON_READER_NAN_INF_NUMBERS); String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); test("select * from dfs.`%s`;", table); fail(); } catch (UserRemoteException e) { @@ -235,7 +227,7 @@ private void doTestCreateTableNanInf() throws Exception { String json = "{\"nan_col\":NaN, \"inf_col\":Infinity}"; String newTable = "ctas_test"; try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); test("alter session set `store.format`='json'"); test("create table dfs.`%s` as select * from dfs.`%s`;", newTable, table); @@ -254,28 +246,6 @@ private void doTestCreateTableNanInf() throws Exception { } } - @Test - public void testConvertFromJsonFunction() throws Exception { - runBoth(this::doTestConvertFromJsonFunction); - } - - private void doTestConvertFromJsonFunction() throws Exception { - String table = "nan_test.csv"; - File file = new File(dirTestWatcher.getRootDir(), table); - String csv = "col_0, {\"nan_col\":NaN}"; - try { - FileUtils.writeStringToFile(file, csv); - testBuilder() - .sqlQuery(String.format("select convert_fromJSON(columns[1]) as col from dfs.`%s`", table)) - .unOrdered() - .baselineColumns("col") - .baselineValues(mapOf("nan_col", Double.NaN)) - .go(); - } finally { - FileUtils.deleteQuietly(file); - } - } - @Test public void testLargeStringBinary() throws Exception { runBoth(() -> doTestLargeStringBinary()); @@ -291,37 +261,6 @@ private void doTestLargeStringBinary() throws Exception { test("select string_binary(binary_string('%s')) from (values(1))", data); } - @Test - public void testConvertToJsonFunction() throws Exception { - runBoth(() -> doTestConvertToJsonFunction()); - } - - private void doTestConvertToJsonFunction() throws Exception { - String table = "nan_test.csv"; - File file = new File(dirTestWatcher.getRootDir(), table); - String csv = "col_0, {\"nan_col\":NaN}"; - String query = String.format("select string_binary(convert_toJSON(convert_fromJSON(columns[1]))) as col " + - "from dfs.`%s` where columns[0]='col_0'", table); - try { - FileUtils.writeStringToFile(file, csv); - List results = testSqlWithResults(query); - RecordBatchLoader batchLoader = new RecordBatchLoader(getAllocator()); - assertEquals("Query result must contain 1 row", 1, results.size()); - QueryDataBatch batch = results.get(0); - - batchLoader.load(batch.getHeader().getDef(), batch.getData()); - VectorWrapper vw = batchLoader.getValueAccessorById(VarCharVector.class, batchLoader.getValueVectorId(SchemaPath.getCompoundPath("col")).getFieldIds()); - // ensuring that `NaN` token ARE NOT enclosed with double quotes - String resultJson = vw.getValueVector().getAccessor().getObject(0).toString(); - int nanIndex = resultJson.indexOf("NaN"); - assertNotEquals("`NaN` must not be enclosed with \"\" ", '"', resultJson.charAt(nanIndex - 1)); - assertNotEquals("`NaN` must not be enclosed with \"\" ", '"', resultJson.charAt(nanIndex + "NaN".length())); - batch.release(); - batchLoader.clear(); - } finally { - FileUtils.deleteQuietly(file); - } - } @Test @Ignore("DRILL-6018") @@ -350,7 +289,7 @@ private void doTestOrderByWithNaN() throws Exception { File file = new File(dirTestWatcher.getRootDir(), table_name); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); testBuilder() .sqlQuery(query) @@ -391,7 +330,7 @@ private void doTestNestedLoopJoinWithNaN() throws Exception { File file = new File(dirTestWatcher.getRootDir(), table_name); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); testBuilder() .sqlQuery(query) @@ -426,7 +365,7 @@ private void doTestHashJoinWithNaN() throws Exception { File file = new File(dirTestWatcher.getRootDir(), table_name); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); testBuilder() .sqlQuery(query) @@ -459,7 +398,7 @@ private void doTestMergeJoinWithNaN() throws Exception { File file = new File(dirTestWatcher.getRootDir(), table_name); try { - FileUtils.writeStringToFile(file, json); + FileUtils.writeStringToFile(file, json, Charset.defaultCharset()); test("alter session set `%s` = true", ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE); testBuilder() .sqlQuery(query) diff --git a/exec/java-exec/src/test/resources/jsoninput/allTypes.csvh b/exec/java-exec/src/test/resources/jsoninput/allTypes.csvh new file mode 100644 index 00000000000..99a6731b18c --- /dev/null +++ b/exec/java-exec/src/test/resources/jsoninput/allTypes.csvh @@ -0,0 +1,2 @@ +col1,col2 +"{bi: 123,fl: 123.4,st: ""foo"",mp: { a: 10, b: ""bar"" },ar: [ 10, 20 ],nu: null}", 2