Skip to content

Commit

Permalink
[CARBONDATA-3114]Remove Null Values in all types of columns for Range…
Browse files Browse the repository at this point in the history
…Filters
  • Loading branch information
Indhumathi27 committed Nov 21, 2018
1 parent b8d6025 commit 63c273c
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.keygenerator.KeyGenException;
import org.apache.carbondata.core.keygenerator.KeyGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
import org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
Expand Down Expand Up @@ -2247,4 +2249,25 @@ public static int compareValues(byte[] filterValue, byte[] minMaxBytes,
}
}

/**
* Get Default null value
* @param currentBlockDimension
* @param segmentProperties
* @return
*/
public static byte[] getDefaultNullValue(CarbonDimension currentBlockDimension,
SegmentProperties segmentProperties) {
byte[] defaultValue = null;
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(currentBlockDimension.getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil
.getMaskKey(key, currentBlockDimension, segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toXorBytes(key);
}
return defaultValue;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else if (dimColEvaluatorInfoList.get(0).getDimension()
.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (!dimensionColumnPage.isAdaptiveEncoded()) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,10 +454,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else if (dimColEvaluatorInfoList.get(0).getDimension()
.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (!dimensionColumnPage.isAdaptiveEncoded()) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
Expand Down Expand Up @@ -426,18 +424,8 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
int numerOfRows) {
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(
dimColEvaluatorInfoList.get(0).getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
CarbonDimension currentBlockDimension =
segmentProperties.getDimensions().get(dimensionChunkIndex[0]);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toXorBytes(key);
}
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
Expand All @@ -452,7 +440,9 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator;
import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
Expand Down Expand Up @@ -422,18 +420,8 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
int numerOfRows) {
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) {
DirectDictionaryGenerator directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory
.getDirectDictionaryGenerator(
dimColEvaluatorInfoList.get(0).getDimension().getDataType());
int key = directDictionaryGenerator.generateDirectSurrogateKey(null);
CarbonDimension currentBlockDimension =
segmentProperties.getDimensions().get(dimensionChunkIndex[0]);
if (currentBlockDimension.isSortColumn()) {
defaultValue = FilterUtil.getMaskKey(key, currentBlockDimension,
this.segmentProperties.getSortColumnsGenerator());
} else {
defaultValue = ByteUtil.toXorBytes(key);
}
defaultValue = FilterUtil
.getDefaultNullValue(dimColEvaluatorInfoList.get(0).getDimension(), segmentProperties);
} else if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() != DataTypes.STRING) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
Expand All @@ -448,7 +436,9 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
if (dimensionColumnPage.isNoDicitionaryColumn() || (
dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)
&& dimColEvaluatorInfoList.get(0).getDimension().hasEncoding(Encoding.DICTIONARY))) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
col
2014-01-01 18:00:00
2014-01-02 18:00:00
2014-01-03 18:00:00

2014-01-03 18:00:00
0
2014-01-03 18:00:00

2014-01-03 18:00:00
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,39 @@ class TimestampDataTypeDirectDictionaryTest extends QueryTest with BeforeAndAfte
)
}

test("test timestamp with dictionary include and no_inverted index") {
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR, "true")
sql("drop table if exists test_timestamp")
sql("drop table if exists test_timestamp_hive")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss")
sql(
"create table test_timestamp(col timestamp) stored by 'carbondata' tblproperties" +
"('no_inverted_index'='col','dictionary_include'='col')")
val csvFilePath = s"$resourcesPath/data_timestamp.csv"
sql(
"load data inpath '" + csvFilePath +
"' into table test_timestamp options('delimiter'='=','quotechar'=''," +
"'bad_records_action'='force','fileheader'='col')")
sql(
"create table test_timestamp_hive(col timestamp) row format delimited fields terminated by " +
"','")
sql("load data inpath '" + csvFilePath + "' into table test_timestamp_hive ")
checkAnswer(sql(
"select col from test_timestamp where col not between '2014-01-01 18:00:00' and '0'"),
sql("select col from test_timestamp_hive where col not between '2014-01-01 18:00:00' and " +
"'0'"))
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR,
CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT)
}

override def afterAll {
sql("drop table directDictionaryTable")
sql("drop table directDictionaryTable_hive")
sql("drop table if exists test_timestamp")
sql("drop table if exists test_timestamp_hive")
CarbonProperties.getInstance()
.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT,
CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT)
Expand Down

0 comments on commit 63c273c

Please sign in to comment.