Skip to content

Commit

Permalink
[CARBONDATA-3114]Remove Null Values for a Dictionary_Include Timestam…
Browse files Browse the repository at this point in the history
…p column for Range Filters

Problem:
Null Values are not removed in case of RangeFilters, if column is a dictionary and no_inverted_index timestamp column.
Solution:
Remove NULL values in case of RangeFilters for such dictionary and no_inverted_index timestamp column.

This closes #2937
  • Loading branch information
Indhumathi27 authored and ravipesala committed Nov 30, 2018
1 parent c364ba0 commit 5e096b8
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.keygenerator.KeyGenException;
import org.apache.carbondata.core.keygenerator.KeyGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
import org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
Expand Down Expand Up @@ -2247,4 +2249,25 @@ public static int compareValues(byte[] filterValue, byte[] minMaxBytes,
}
}

/**
* This method is used to get default null values for a direct dictionary column
* @param currentBlockDimension
* @param segmentProperties
* @return
*/
public static byte[] getDefaultNullValue(CarbonDimension currentBlockDimension,
SegmentProperties segmentProperties) {
byte[] defaultValue = null;
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(currentBlockDimension.getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil
.getMaskKey(key, currentBlockDimension, segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toXorBytes(key);
}
return defaultValue;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
Expand Down Expand Up @@ -55,10 +53,6 @@ public class RangeValueFilterExecuterImpl implements FilterExecuter {
private byte[][] filterRangesValues;
private SegmentProperties segmentProperties;
private boolean isDefaultValuePresentInFilter;
/**
* it has index at which given dimension is stored in file
*/
private int dimensionChunkIndex;

/**
* flag to check whether the filter dimension is present in current block list of dimensions.
Expand Down Expand Up @@ -106,8 +100,6 @@ private void initDimensionChunkIndexes() {
segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension());
if (null != dimensionFromCurrentBlock) {
dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal());
this.dimensionChunkIndex = segmentProperties.getDimensionOrdinalToChunkMapping()
.get(dimensionFromCurrentBlock.getOrdinal());
isDimensionPresentInCurrentBlock = true;
}
}
Expand Down Expand Up @@ -656,17 +648,8 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnPage dimensionColumnPage,
} else {
byte[] defaultValue = null;
if (dimColEvaluatorInfo.getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(dimColEvaluatorInfo.getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
CarbonDimension currentBlockDimension =
segmentProperties.getDimensions().get(dimensionChunkIndex);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toXorBytes(key);
}
defaultValue =
FilterUtil.getDefaultNullValue(dimColEvaluatorInfo.getDimension(), segmentProperties);
} else {
if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else if (dimColEvaluatorInfoList.get(0).getDimension()
.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (!dimensionColumnPage.isAdaptiveEncoded()) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,10 +454,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else if (dimColEvaluatorInfoList.get(0).getDimension()
.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (!dimensionColumnPage.isAdaptiveEncoded()) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
Expand Down Expand Up @@ -426,18 +424,8 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
int numerOfRows) {
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(
dimColEvaluatorInfoList.get(0).getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
CarbonDimension currentBlockDimension =
segmentProperties.getDimensions().get(dimensionChunkIndex[0]);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toXorBytes(key);
}
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
Expand All @@ -452,7 +440,9 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
Expand Down Expand Up @@ -422,18 +420,8 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
int numerOfRows) {
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(
dimColEvaluatorInfoList.get(0).getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
CarbonDimension currentBlockDimension =
segmentProperties.getDimensions().get(dimensionChunkIndex[0]);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toXorBytes(key);
}
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
Expand All @@ -448,7 +436,9 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
col
2014-01-01 18:00:00
2014-01-02 18:00:00
2014-01-03 18:00:00

2014-01-03 18:00:00
0
2014-01-03 18:00:00

2014-01-03 18:00:00
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,39 @@ class TimestampDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfte
)
}

test("test timestamp with dictionary include and no_inverted index") {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR, "true")
sql("drop table if exists test_timestamp")
sql("drop table if exists test_timestamp_hive")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss")
sql(
"create table test_timestamp(col timestamp) stored by 'carbondata' tblproperties" +
"('no_inverted_index'='col','dictionary_include'='col')")
val csvFilePath = s"$resourcesPath/data_timestamp.csv"
sql(
"load data inpath '" + csvFilePath +
"' into table test_timestamp options('delimiter'='=','quotechar'=''," +
"'bad_records_action'='force','fileheader'='col')")
sql(
"create table test_timestamp_hive(col timestamp) row format delimited fields terminated by " +
"','")
sql("load data inpath '" + csvFilePath + "' into table test_timestamp_hive ")
checkAnswer(sql(
"select col from test_timestamp where col not between '2014-01-01 18:00:00' and '0'"),
sql("select col from test_timestamp_hive where col not between '2014-01-01 18:00:00' and " +
"'0'"))
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT)
}

override def afterAll {
sql("drop table directDictionaryTable")
sql("drop table directDictionaryTable_hive")
sql("drop table if exists test_timestamp")
sql("drop table if exists test_timestamp_hive")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
Expand Down

0 comments on commit 5e096b8

Please sign in to comment.