From 90386f64d0043601e471b860f709d6404086da3a Mon Sep 17 00:00:00 2001 From: Indhumathi27 Date: Mon, 30 Jul 2018 14:18:44 +0530 Subject: [PATCH] Fix Dictionary_Include for ComplexDataType --- .../src/test/resources/nontransactional1.csv | 2 ++ .../complexType/TestComplexDataType.scala | 7 +++++ .../TestNonTransactionalCarbonTable.scala | 30 ++++++++++++++++++ .../processing/datatypes/ArrayDataType.java | 24 ++++++++++++-- .../processing/datatypes/StructDataType.java | 31 ++++++++++++++----- .../converter/impl/FieldEncoderFactory.java | 6 ++-- .../processing/loading/model/LoadOption.java | 4 ++- 7 files changed, 91 insertions(+), 13 deletions(-) create mode 100644 integration/spark-common-test/src/test/resources/nontransactional1.csv diff --git a/integration/spark-common-test/src/test/resources/nontransactional1.csv b/integration/spark-common-test/src/test/resources/nontransactional1.csv new file mode 100644 index 00000000000..ac9ec5470c5 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/nontransactional1.csv @@ -0,0 +1,2 @@ +arvind, 33, 6.2 +bill, 35, 7.3 \ No newline at end of file diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala index 2b3cfc066f6..01f1085bf74 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/complexType/TestComplexDataType.scala @@ -971,6 +971,13 @@ class TestComplexDataType extends QueryTest with BeforeAndAfterAll { "('dictionary_include'='b')") sql("insert into test values(1,2) ") checkAnswer(sql("select b[0] from test"),Seq(Row(2))) + sql("DROP TABLE IF EXISTS test") + sql( + "create table test(intval array>,str array>, bool " + + "array>, sint array>, big array>) stored by " + + "'carbondata' tblproperties('dictionary_include'='bool,sint,big')") + sql("insert into test values(1,'ab',true,22,33)") + checkExistence(sql("select * from test"), true, "33") } test("date with struct and array") { diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 62c3df6ac92..a4338ed3c17 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -52,6 +52,7 @@ import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory import org.apache.carbondata.core.metadata.ColumnarFormatVersion import org.apache.carbondata.core.metadata.datatype.DataTypes import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil, DataFileFooterConverterV3} +import org.apache.carbondata.processing.loading.exception.CarbonDataLoadingException import org.apache.carbondata.sdk.file._ @@ -350,6 +351,35 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { cleanTestData() } + test(" test csv fileheader for transactional table") { + FileUtils.deleteDirectory(new File(writerPath)) + buildTestDataWithSameUUID(3, false, null, List("name")) + assert(new File(writerPath).exists()) + + sql("DROP TABLE IF EXISTS sdkOutputTable") + + sql( + s"""CREATE EXTERNAL TABLE sdkOutputTable STORED BY 'carbondata' LOCATION + |'$writerPath' """.stripMargin) + + checkAnswer(sql("SELECT name,name FROM sdkOutputTable"), Seq( + Row("robot0", "robot0"), + Row("robot1", "robot1"), + Row("robot2", "robot2"))) + //load csvfile without fileheader + var exception = intercept[CarbonDataLoadingException] { + sql(s"""load data inpath '$resourcesPath/nontransactional1.csv' into table sdkOutputTable""").show(200,false) + } + assert(exception.getMessage() + .contains("CSV header in input file is not proper. Column names in schema and csv header are not the same.")) + + sql("DROP TABLE sdkOutputTable") + // drop table should not delete the files + assert(new File(writerPath).exists()) + cleanTestData() + } + + test("test count star with multiple loads files with same schema and UUID") { FileUtils.deleteDirectory(new File(writerPath)) buildTestDataWithSameUUID(3, false, null, List("name")) diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java index 60972e8ba51..0a1eba80a9f 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java +++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/ArrayDataType.java @@ -62,6 +62,11 @@ public class ArrayDataType implements GenericDataType { */ private int outputArrayIndex; + /** + * Dictionary column + */ + private boolean isDictionaryColumn; + /** * current data counter */ @@ -88,6 +93,21 @@ public ArrayDataType(String name, String parentname, String columnId) { this.columnId = columnId; } + /** + * constructor + * @param name + * @param parentname + * @param columnId + * @param isDictionaryColumn + */ + public ArrayDataType(String name, String parentname, String columnId, + Boolean isDictionaryColumn) { + this.name = name; + this.parentname = parentname; + this.columnId = columnId; + this.isDictionaryColumn = isDictionaryColumn; + } + /* * to add child dimensions */ @@ -153,7 +173,7 @@ public void setSurrogateIndex(int surrIndex) { } @Override public boolean getIsColumnDictionary() { - return true; + return isDictionaryColumn; } @Override public void writeByteArray(ArrayObject input, DataOutputStream dataOutputStream, @@ -172,7 +192,7 @@ public void setSurrogateIndex(int surrIndex) { @Override public void fillCardinality(List dimCardWithComplex) { - if (children.getIsColumnDictionary()) { + if (this.getIsColumnDictionary()) { dimCardWithComplex.add(0); children.fillCardinality(dimCardWithComplex); } diff --git a/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java b/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java index af95de63a0e..31f2234562a 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java +++ b/processing/src/main/java/org/apache/carbondata/processing/datatypes/StructDataType.java @@ -57,6 +57,12 @@ public class StructDataType implements GenericDataType { * output array index */ private int outputArrayIndex; + + /** + * Dictionary column + */ + private boolean isDictionaryColumn; + /** * data counter */ @@ -82,6 +88,21 @@ public StructDataType(String name, String parentname, String columnId) { this.columnId = columnId; } + /** + * constructor + * @param name + * @param parentname + * @param columnId + * @param isDictionaryColumn + */ + public StructDataType(String name, String parentname, String columnId, + Boolean isDictionaryColumn) { + this.name = name; + this.parentname = parentname; + this.columnId = columnId; + this.isDictionaryColumn = isDictionaryColumn; + } + /* * add child dimensions */ @@ -153,7 +174,7 @@ public void setSurrogateIndex(int surrIndex) { } @Override public boolean getIsColumnDictionary() { - return true; + return isDictionaryColumn; } @Override public void writeByteArray(StructObject input, DataOutputStream dataOutputStream, @@ -178,13 +199,7 @@ public void setSurrogateIndex(int surrIndex) { @Override public void fillCardinality(List dimCardWithComplex) { - boolean isDictionaryColumn = false; - for (GenericDataType child : children) { - if (child.getIsColumnDictionary()) { - isDictionaryColumn = true; - } - } - if (isDictionaryColumn) { + if (this.getIsColumnDictionary()) { dimCardWithComplex.add(0); for (int i = 0; i < children.size(); i++) { children.get(i).fillCardinality(dimCardWithComplex); diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java b/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java index 39c12a95ceb..e9d2b02e619 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/converter/impl/FieldEncoderFactory.java @@ -144,7 +144,8 @@ private static GenericDataType createComplexType(CarbonColumn carbonColumn, Stri ((CarbonDimension) carbonColumn).getListOfChildDimensions(); // Create array parser with complex delimiter ArrayDataType arrayDataType = - new ArrayDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId()); + new ArrayDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(), + carbonColumn.hasEncoding(Encoding.DICTIONARY)); for (CarbonDimension dimension : listOfChildDimensions) { arrayDataType.addChildren( createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier, @@ -156,7 +157,8 @@ private static GenericDataType createComplexType(CarbonColumn carbonColumn, Stri ((CarbonDimension) carbonColumn).getListOfChildDimensions(); // Create struct parser with complex delimiter StructDataType structDataType = - new StructDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId()); + new StructDataType(carbonColumn.getColName(), parentName, carbonColumn.getColumnId(), + carbonColumn.hasEncoding(Encoding.DICTIONARY)); for (CarbonDimension dimension : dimensions) { structDataType.addChildren( createComplexType(dimension, carbonColumn.getColName(), absoluteTableIdentifier, diff --git a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java index 97338167a08..98cd90d5e24 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java +++ b/processing/src/main/java/org/apache/carbondata/processing/loading/model/LoadOption.java @@ -236,7 +236,9 @@ public static String[] getCsvHeaderColumns( } } - if (carbonLoadModel.isCarbonTransactionalTable() && !CarbonDataProcessorUtil + // In SDK flow, hadoopConf will always be null, + // hence FileHeader check is not required for nontransactional table + if (hadoopConf != null && !CarbonDataProcessorUtil .isHeaderValid(carbonLoadModel.getTableName(), csvColumns, carbonLoadModel.getCarbonDataLoadSchema(), staticPartitionCols)) { if (csvFile == null) {