From 6cc7c31475682d73c0828556017429c6d49f284b Mon Sep 17 00:00:00 2001 From: Indhumathi27 Date: Wed, 21 Nov 2018 15:21:49 +0530 Subject: [PATCH] [CARBONDATA-3114]Remove Null Values in all types of columns for RangeFilters --- .../core/scan/filter/FilterUtil.java | 23 ++++++++++++++ .../RangeValueFilterExecuterImpl.java | 21 ++----------- ...RowLevelRangeGrtThanFiterExecuterImpl.java | 8 ++++- ...RangeGrtrThanEquaToFilterExecuterImpl.java | 8 ++++- ...lRangeLessThanEqualFilterExecuterImpl.java | 20 ++++--------- ...wLevelRangeLessThanFilterExecuterImpl.java | 20 ++++--------- .../src/test/resources/data_timestamp.csv | 10 +++++++ ...tampDataTypeDirectDictionaryTestCase.scala | 30 +++++++++++++++++++ 8 files changed, 89 insertions(+), 51 deletions(-) create mode 100644 integration/spark-common-test/src/test/resources/data_timestamp.csv diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java index 06672f52013..286f68f2454 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/FilterUtil.java @@ -52,6 +52,8 @@ import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; import org.apache.carbondata.core.keygenerator.KeyGenException; import org.apache.carbondata.core.keygenerator.KeyGenerator; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory; import org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; @@ -2247,4 +2249,25 @@ public static int compareValues(byte[] filterValue, byte[] minMaxBytes, } } + /** + * This method is used to get default null values for a direct dictionary column + * @param currentBlockDimension + * @param segmentProperties + * @return + */ + public static byte[] getDefaultNullValue(CarbonDimension currentBlockDimension, + SegmentProperties segmentProperties) { + byte[] defaultValue = null; + DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory + .getDirectDictionaryGenerator(currentBlockDimension.getDataType()); + int key = directDictionaryGenerator.generateDirectSurrogateKey(null); + if (currentBlockDimension.isSortColumn()) { + defaultValue = FilterUtil + .getMaskKey(key, currentBlockDimension, segmentProperties.getSortColumnsGenerator()); + } else { + defaultValue = ByteUtil.toXorBytes(key); + } + return defaultValue; + } + } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java index e84e82da77f..bcae001f1fb 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java @@ -24,8 +24,6 @@ import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; -import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; -import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; @@ -55,10 +53,6 @@ public class RangeValueFilterExecuterImpl implements FilterExecuter { private byte[][] filterRangesValues; private SegmentProperties segmentProperties; private boolean isDefaultValuePresentInFilter; - /** - * it has index at which given dimension is stored in file - */ - private int dimensionChunkIndex; /** * flag to check whether the filter dimension is present in current block list of dimensions. @@ -106,8 +100,6 @@ private void initDimensionChunkIndexes() { segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension()); if (null != dimensionFromCurrentBlock) { dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal()); - this.dimensionChunkIndex = segmentProperties.getDimensionOrdinalToChunkMapping() - .get(dimensionFromCurrentBlock.getOrdinal()); isDimensionPresentInCurrentBlock = true; } } @@ -656,17 +648,8 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnPage dimensionColumnPage, } else { byte[] defaultValue = null; if (dimColEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { - DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory - .getDirectDictionaryGenerator(dimColEvaluatorInfo.getDimension().getDataType()); - int key = directDictionaryGenerator.generateDirectSurrogateKey(null); - CarbonDimension currentBlockDimension = - segmentProperties.getDimensions().get(dimensionChunkIndex); - if (currentBlockDimension.isSortColumn()) { - defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, - this.segmentProperties.getSortColumnsGenerator()); - } else { - defaultValue = ByteUtil.toXorBytes(key); - } + defaultValue = + FilterUtil.getDefaultNullValue(dimColEvaluatorInfo.getDimension(), segmentProperties); } else { if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java index 06dc3c4e1ab..d2c4b057617 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java @@ -450,10 +450,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, byte[] defaultValue = null; if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } else if (dimColEvaluatorInfoList.get(0).getDimension() + .hasEncoding(Encoding.DIRECT_DICTIONARY)) { + defaultValue = FilterUtil + .getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties); } else if (!dimensionColumnPage.isAdaptiveEncoded()) { defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; } - if (dimensionColumnPage.isNoDicitionaryColumn()) { + if (dimensionColumnPage.isNoDicitionaryColumn() || ( + dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY) + && dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) { FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java index 02c587e400e..cf31033b823 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java @@ -454,10 +454,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, byte[] defaultValue = null; if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; + } else if (dimColEvaluatorInfoList.get(0).getDimension() + .hasEncoding(Encoding.DIRECT_DICTIONARY)) { + defaultValue = FilterUtil + .getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties); } else if (!dimensionColumnPage.isAdaptiveEncoded()) { defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; } - if (dimensionColumnPage.isNoDicitionaryColumn()) { + if (dimensionColumnPage.isNoDicitionaryColumn() || ( + dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY) + && dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) { FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java index ac9661e23aa..8ea6e0d7554 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java @@ -26,8 +26,6 @@ import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; -import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; @@ -426,18 +424,8 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, int numerOfRows) { byte[] defaultValue = null; if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { - DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory - .getDirectDictionaryGenerator( - dimColEvaluatorInfoList.get(0).getDimension().getDataType()); - int key = directDictionaryGenerator.generateDirectSurrogateKey(null); - CarbonDimension currentBlockDimension = - segmentProperties.getDimensions().get(dimensionChunkIndex[0]); - if (currentBlockDimension.isSortColumn()) { - defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, - this.segmentProperties.getSortColumnsGenerator()); - } else { - defaultValue = ByteUtil.toXorBytes(key); - } + defaultValue = FilterUtil + .getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties); } else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) { defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; } @@ -452,7 +440,9 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; } - if (dimensionColumnPage.isNoDicitionaryColumn()) { + if (dimensionColumnPage.isNoDicitionaryColumn() || ( + dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY) + && dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) { FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java index 644cf028fbf..df1afc4f96b 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java @@ -26,8 +26,6 @@ import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; -import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; -import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; @@ -422,18 +420,8 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, int numerOfRows) { byte[] defaultValue = null; if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { - DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory - .getDirectDictionaryGenerator( - dimColEvaluatorInfoList.get(0).getDimension().getDataType()); - int key = directDictionaryGenerator.generateDirectSurrogateKey(null); - CarbonDimension currentBlockDimension = - segmentProperties.getDimensions().get(dimensionChunkIndex[0]); - if (currentBlockDimension.isSortColumn()) { - defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension, - this.segmentProperties.getSortColumnsGenerator()); - } else { - defaultValue = ByteUtil.toXorBytes(key); - } + defaultValue = FilterUtil + .getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties); } else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) { defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY; } @@ -448,7 +436,9 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage, if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) { defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; } - if (dimensionColumnPage.isNoDicitionaryColumn()) { + if (dimensionColumnPage.isNoDicitionaryColumn() || ( + dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY) + && dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) { FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue); } return bitSet; diff --git a/integration/spark-common-test/src/test/resources/data_timestamp.csv b/integration/spark-common-test/src/test/resources/data_timestamp.csv new file mode 100644 index 00000000000..d11c69a1fc3 --- /dev/null +++ b/integration/spark-common-test/src/test/resources/data_timestamp.csv @@ -0,0 +1,10 @@ +col +2014-01-01 18:00:00 +2014-01-02 18:00:00 +2014-01-03 18:00:00 + +2014-01-03 18:00:00 +0 +2014-01-03 18:00:00 + +2014-01-03 18:00:00 diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala index b8d057c4782..6c8ab537c68 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/directdictionary/TimestampDataTypeDirectDictionaryTestCase.scala @@ -141,9 +141,39 @@ class TimestampDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfte ) } + test("test timestamp with dictionary include and no_inverted index") { + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR, "true") + sql("drop table if exists test_timestamp") + sql("drop table if exists test_timestamp_hive") + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss") + sql( + "create table test_timestamp(col timestamp) stored by 'carbondata' tblproperties" + + "('no_inverted_index'='col','dictionary_include'='col')") + val csvFilePath = s"$resourcesPath/data_timestamp.csv" + sql( + "load data inpath '" + csvFilePath + + "' into table test_timestamp options('delimiter'='=','quotechar'=''," + + "'bad_records_action'='force','fileheader'='col')") + sql( + "create table test_timestamp_hive(col timestamp) row format delimited fields terminated by " + + "','") + sql("load data inpath '" + csvFilePath + "' into table test_timestamp_hive ") + checkAnswer(sql( + "select col from test_timestamp where col not between '2014-01-01 18:00:00' and '0'"), + sql("select col from test_timestamp_hive where col not between '2014-01-01 18:00:00' and " + + "'0'")) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR, + CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT) + } + override def afterAll { sql("drop table directDictionaryTable") sql("drop table directDictionaryTable_hive") + sql("drop table if exists test_timestamp") + sql("drop table if exists test_timestamp_hive") CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)