Skip to content

Commit

Permalink
Merge fb49d67 into 3cd8b94
Browse files Browse the repository at this point in the history
  • Loading branch information
dhatchayani committed Sep 26, 2018
2 parents 3cd8b94 + fb49d67 commit 5408853
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 27 deletions.
Expand Up @@ -36,6 +36,7 @@
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;

import org.apache.hadoop.util.bloom.CarbonBloomFilter;
import org.apache.hadoop.util.bloom.Key;
Expand All @@ -52,6 +53,7 @@ public abstract class AbstractBloomDataMapWriter extends DataMapWriter {
private List<String> currentDMFiles;
private List<DataOutputStream> currentDataOutStreams;
protected List<CarbonBloomFilter> indexBloomFilters;
private boolean[] isNoDictionaryPrimitive;

AbstractBloomDataMapWriter(String tablePath, String dataMapName, List<CarbonColumn> indexColumns,
Segment segment, String shardName, SegmentProperties segmentProperties,
Expand All @@ -64,6 +66,11 @@ public abstract class AbstractBloomDataMapWriter extends DataMapWriter {
currentDMFiles = new ArrayList<>(indexColumns.size());
currentDataOutStreams = new ArrayList<>(indexColumns.size());
indexBloomFilters = new ArrayList<>(indexColumns.size());
// to get the null value of the no dictionary primitive column
isNoDictionaryPrimitive = new boolean[indexColumns.size()];
for (int i = 0; i < indexColumns.size(); i++) {
isNoDictionaryPrimitive[i] = checkNoDictionaryPrimitiveDataType(i);
}
initDataMapFile();
resetBloomFilters();
}
Expand Down Expand Up @@ -119,12 +126,28 @@ public void onPageAdded(int blockletId, int pageId, int pageSize, ColumnPage[] p
for (int rowId = 0; rowId < pageSize; rowId++) {
// for each indexed column, add the data to index
for (int i = 0; i < indexColumns.size(); i++) {
Object data = pages[i].getData(rowId);
addValue2BloomIndex(i, data);
// in primitive no dictionary page, null is written as 0.
// check the null bitsets from the page and consider it as null value
if (isNoDictionaryPrimitive[i] && pages[i].getNullBits().get(rowId)) {
addValue2BloomIndex(i, null);
} else {
Object data = pages[i].getData(rowId);
addValue2BloomIndex(i, data);
}
}
}
}

private boolean checkNoDictionaryPrimitiveDataType(int index) {
if (indexColumns.get(index).isDimension() && !(
indexColumns.get(index).hasEncoding(Encoding.DICTIONARY) || indexColumns.get(index)
.hasEncoding(Encoding.DIRECT_DICTIONARY)) && DataTypeUtil
.isPrimitiveColumn(indexColumns.get(index).getDataType())) {
return true;
}
return false;
}

protected void addValue2BloomIndex(int indexColIdx, Object value) {
byte[] indexValue;
// convert measure to bytes
Expand Down
Expand Up @@ -288,12 +288,16 @@ private BloomQueryModel buildQueryModelForEqual(ColumnExpression ce,
format.setTimeZone(TimeZone.getTimeZone("GMT"));

literalValue = format.format(new Date((long) expressionValue / 1000));
} else if (le.getLiteralExpDataType() == DataTypes.TIMESTAMP) {
} else if (le.getLiteralExpDataType() == DataTypes.TIMESTAMP && (
this.name2Col.get(columnName).hasEncoding(Encoding.DICTIONARY) || this.name2Col
.get(columnName).hasEncoding(Encoding.DIRECT_DICTIONARY))) {
DateFormat format =
new SimpleDateFormat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
// the below settings are set statically according to TimeStampDirectDirectionaryGenerator
format.setLenient(false);
literalValue = format.format(new Date((long) expressionValue / 1000));
} else if (le.getLiteralExpDataType() == DataTypes.TIMESTAMP) {
literalValue = (long) expressionValue / 1000L;
} else {
literalValue = expressionValue;
}
Expand Down Expand Up @@ -323,39 +327,33 @@ private BloomQueryModel buildQueryModelInternal(CarbonColumn carbonColumn,
if (null != filterLiteralValue) {
strFilterValue = String.valueOf(filterLiteralValue);
}

Object convertedValue = this.name2Converters.get(carbonColumn.getColName()).convert(
strFilterValue, badRecordLogHolder);
Object convertedValue = null;

byte[] internalFilterValue;
if (carbonColumn.isMeasure()) {
convertedValue = this.name2Converters.get(carbonColumn.getColName())
.convert(strFilterValue, badRecordLogHolder);
// for measures, the value is already the type, just convert it to bytes.
if (convertedValue == null) {
convertedValue = DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType(),
carbonColumn.getColumnSchema().getScale());
}
// Carbon stores boolean as byte. Here we convert it for `getValueAsBytes`
if (carbonColumn.getDataType().equals(DataTypes.BOOLEAN)) {
convertedValue = BooleanConvert.boolean2Byte((Boolean)convertedValue);
convertedValue = BooleanConvert.boolean2Byte((Boolean) convertedValue);
}
internalFilterValue = CarbonUtil.getValueAsBytes(carbonColumn.getDataType(), convertedValue);
} else if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY) ||
carbonColumn.hasEncoding(Encoding.DICTIONARY)) {
} else if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY) || carbonColumn
.hasEncoding(Encoding.DICTIONARY)) {
convertedValue = this.name2Converters.get(carbonColumn.getColName())
.convert(strFilterValue, badRecordLogHolder);
// for dictionary/date columns, convert the surrogate key to bytes
internalFilterValue = CarbonUtil.getValueAsBytes(DataTypes.INT, convertedValue);
} else {
// for non dictionary dimensions, numeric columns will be of original data,
// so convert the data to bytes
if (DataTypeUtil.isPrimitiveColumn(carbonColumn.getDataType())) {
if (convertedValue == null) {
convertedValue = DataConvertUtil.getNullValueForMeasure(carbonColumn.getDataType(),
carbonColumn.getColumnSchema().getScale());
}
internalFilterValue =
CarbonUtil.getValueAsBytes(carbonColumn.getDataType(), convertedValue);
} else {
internalFilterValue = (byte[]) convertedValue;
}
// for non dictionary dimensions, convert the filter value to bytes based on the data type
internalFilterValue = DataTypeUtil
.getBytesDataDataTypeForNoDictionaryColumn(filterLiteralValue,
carbonColumn.getDataType());
}
if (internalFilterValue.length == 0) {
internalFilterValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY;
Expand Down
Expand Up @@ -65,7 +65,8 @@ public void addRow(int blockletId, int pageId, int rowId, Object[] values) {
protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) {
// no dictionary measure columns will be of original data, so convert it to bytes
if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) {
return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value);
return DataTypeUtil.getBytesDataDataTypeForNoDictionaryColumn(value,
indexColumns.get(indexColIdx).getDataType());
}
return (byte[]) value;
}
Expand Down
Expand Up @@ -79,7 +79,8 @@ protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) {
return DataConvertUtil.getRawBytesForVarchar((byte[]) value);
} else if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) {
// get bytes for the original value of the no dictionary column
return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value);
return DataTypeUtil.getBytesDataDataTypeForNoDictionaryColumn(value,
indexColumns.get(indexColIdx).getDataType());
} else {
return DataConvertUtil.getRawBytes((byte[]) value);
}
Expand Down
Expand Up @@ -271,10 +271,6 @@ class RawBytesReadSupport(segmentProperties: SegmentProperties, indexColumns: Ar
if (DataTypeUtil.isPrimitiveColumn(col.getDataType)) {
var dataFromBytes = DataTypeUtil
.getDataBasedOnDataTypeForNoDictionaryColumn(bytes, col.getDataType)
if (dataFromBytes == null) {
dataFromBytes = DataConvertUtil
.getNullValueForMeasure(col.getDataType, col.getColumnSchema.getScale)
}
// for timestamp the above method will give the original data, so it should be
// converted again to the format to be loaded (without micros)
if (null != dataFromBytes && col.getDataType == DataTypes.TIMESTAMP) {
Expand Down

0 comments on commit 5408853

Please sign in to comment.