Skip to content

Commit

Permalink
[CARBONDATA-2975] DefaultValue choosing and removeNullValues on range…
Browse files Browse the repository at this point in the history
… filters is incorrect
  • Loading branch information
dhatchayani committed Sep 28, 2018
1 parent 3cd8b94 commit 988663a
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 29 deletions.
Expand Up @@ -134,14 +134,10 @@ private byte[] getChunkData(int rowId, boolean isRowIdChanged) {
.getDictionaryValue(CarbonUtil.getSurrogateInternal(columnPage.getBytes(rowId), 0, 3));
} else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && isAdaptiveEncoded()) || (
columnType == ColumnType.PLAIN_VALUE && DataTypeUtil.isPrimitiveColumn(srcDataType))) {
if (!isRowIdChanged && columnPage.getNullBits().get(rowId)
&& columnType == ColumnType.COMPLEX_PRIMITIVE) {
byte[] nullBitSet = getNullBitSet(rowId, columnType);
if (!isRowIdChanged && null != nullBitSet) {
// if this row is null, return default null represent in byte array
return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) {
// if this row is null, return default null represent in byte array
return CarbonCommonConstants.EMPTY_BYTE_ARRAY;
return nullBitSet;
}
if (srcDataType == DataTypes.FLOAT) {
float floatData = columnPage.getFloat(rowId);
Expand Down Expand Up @@ -182,8 +178,10 @@ private byte[] getChunkData(int rowId, boolean isRowIdChanged) {
throw new RuntimeException("unsupported type: " + targetDataType);
}
} else if ((columnType == ColumnType.COMPLEX_PRIMITIVE && !isAdaptiveEncoded())) {
if (!isRowIdChanged && columnPage.getNullBits().get(rowId)) {
return CarbonCommonConstants.EMPTY_BYTE_ARRAY;
byte[] nullBitSet = getNullBitSet(rowId, columnType);
if (!isRowIdChanged && null != nullBitSet) {
// if this row is null, return default null represent in byte array
return nullBitSet;
}
if ((srcDataType == DataTypes.BYTE) || (srcDataType == DataTypes.BOOLEAN)) {
byte[] out = new byte[1];
Expand All @@ -205,6 +203,21 @@ private byte[] getChunkData(int rowId, boolean isRowIdChanged) {
}
}

private byte[] getNullBitSet(int rowId, ColumnType columnType) {
if (isExplicitSorted()) {
rowId = getInvertedReverseIndex(rowId);
}
if (columnPage.getNullBits().get(rowId) && columnType == ColumnType.COMPLEX_PRIMITIVE) {
// if this row is null, return default null represent in byte array
return CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (columnPage.getNullBits().get(rowId)) {
// if this row is null, return default null represent in byte array
return CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
return null;
}

private Object getActualData(int rowId, boolean isRowIdChanged) {
ColumnType columnType = columnPage.getColumnSpec().getColumnType();
DataType srcDataType = columnPage.getColumnSpec().getSchemaDataType();
Expand Down Expand Up @@ -302,8 +315,18 @@ public boolean isExplicitSorted() {

@Override
public int compareTo(int rowId, byte[] compareValue) {
byte[] chunkData = this.getChunkData((int) rowId);
return ByteUtil.UnsafeComparer.INSTANCE.compareTo(chunkData, compareValue);
byte[] nullBitSet = getNullBitSet(rowId, columnPage.getColumnSpec().getColumnType());
if (isExplicitSorted() && null != nullBitSet) {
// if this row is null, return default null represent in byte array
return ByteUtil.UnsafeComparer.INSTANCE.compareTo(nullBitSet, compareValue);
} else {
boolean isRowIdChanged = false;
if (isExplicitSorted) {
isRowIdChanged = true;
}
byte[] chunkData = this.getChunkData(rowId, isRowIdChanged);
return ByteUtil.UnsafeComparer.INSTANCE.compareTo(chunkData, compareValue);
}
}

@Override
Expand Down
Expand Up @@ -1947,11 +1947,12 @@ public static BitSetGroup createBitSetGroupWithDefaultValue(int pageCount, int t
public static void removeNullValues(DimensionColumnPage dimensionColumnPage, BitSet bitSet,
byte[] defaultValue) {
if (!bitSet.isEmpty()) {
if (null != dimensionColumnPage.getNullBits() && !dimensionColumnPage.getNullBits().isEmpty()
&& !dimensionColumnPage.isExplicitSorted() && !dimensionColumnPage.isAdaptiveEncoded()) {
for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
if (dimensionColumnPage.getNullBits().get(i)) {
bitSet.flip(i);
if (null != dimensionColumnPage.getNullBits()) {
if (!dimensionColumnPage.getNullBits().isEmpty()) {
for (int i = bitSet.nextSetBit(0); i >= 0; i = bitSet.nextSetBit(i + 1)) {
if (dimensionColumnPage.getNullBits().get(i)) {
bitSet.flip(i);
}
}
}
} else {
Expand Down
Expand Up @@ -632,7 +632,7 @@ private BitSet setFilterdIndexToBitSet(DimensionColumnPage dimensionColumnPage,
} else {
if (dimColEvaluatorInfo.getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else {
} else if (!dimensionColumnPage.isAdaptiveEncoded()) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
}
Expand Down
Expand Up @@ -28,6 +28,7 @@
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
Expand Down Expand Up @@ -387,9 +388,14 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
} else {
bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows);
}
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else if (!dimensionColumnPage.isAdaptiveEncoded()) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet,
CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
}
Expand Down
Expand Up @@ -28,6 +28,7 @@
import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
Expand Down Expand Up @@ -386,9 +387,14 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
} else {
bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows);
}
byte[] defaultValue = null;
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
} else if (!dimensionColumnPage.isAdaptiveEncoded()) {
defaultValue = CarbonCommonConstants.EMPTY_BYTE_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet,
CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
}
Expand Down
Expand Up @@ -383,13 +383,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
BitSet bitSet = null;
if (dimensionColumnPage.isExplicitSorted()) {
bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnPage, numerOfRows,
defaultValue);
dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue);
} else {
bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, defaultValue);
bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows,
dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue);
}
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet,
CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
}
Expand Down
Expand Up @@ -380,13 +380,16 @@ private BitSet getFilteredIndexes(DimensionColumnPage dimensionColumnPage,
BitSet bitSet = null;
if (dimensionColumnPage.isExplicitSorted()) {
bitSet = setFilterdIndexToBitSetWithColumnIndex(dimensionColumnPage, numerOfRows,
defaultValue);
dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue);
} else {
bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows, defaultValue);
bitSet = setFilterdIndexToBitSet(dimensionColumnPage, numerOfRows,
dimensionColumnPage.isAdaptiveEncoded() ? null : defaultValue);
}
if (dimColEvaluatorInfoList.get(0).getDimension().getDataType() == DataTypes.STRING) {
defaultValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
}
if (dimensionColumnPage.isNoDicitionaryColumn()) {
FilterUtil.removeNullValues(dimensionColumnPage, bitSet,
CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY);
FilterUtil.removeNullValues(dimensionColumnPage, bitSet, defaultValue);
}
return bitSet;
}
Expand Down
Expand Up @@ -343,6 +343,12 @@ class TestSortColumns extends QueryTest with BeforeAndAfterAll {
sql(s"LOAD DATA local inpath '$resourcesPath/numeric_column_invalid_values.csv' INTO TABLE test_sort_col OPTIONS('FILEHEADER'='id,name,age')")
// compare hive and carbon data
checkAnswer(sql("select * from test_sort_col_hive"), sql("select * from test_sort_col"))
checkAnswer(sql("select * from test_sort_col_hive where age < 25"), sql("select * from test_sort_col where age < 25"))
checkAnswer(sql("select * from test_sort_col_hive where age <= 25"), sql("select * from test_sort_col where age <= 25"))
checkAnswer(sql("select * from test_sort_col_hive where age > 25"), sql("select * from test_sort_col where age > 25"))
checkAnswer(sql("select * from test_sort_col_hive where age >= 25"), sql("select * from test_sort_col where age >= 25"))
checkAnswer(sql("select * from test_sort_col_hive where age is null"), sql("select * from test_sort_col where age is null"))
checkAnswer(sql("select * from test_sort_col_hive where age is not null"), sql("select * from test_sort_col where age is not null"))
}

test("describe formatted for sort_columns") {
Expand Down

0 comments on commit 988663a

Please sign in to comment.