diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 46139c946c3..59e0c1d8d63 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -1845,6 +1845,18 @@ public final class CarbonCommonConstants { public static final int CARBON_MINMAX_ALLOWED_BYTE_COUNT_MIN = 10; public static final int CARBON_MINMAX_ALLOWED_BYTE_COUNT_MAX = 1000; + /** + * When enabled complete row filters will be handled by carbon in case of vector. + * If it is disabled then only page level pruning will be done by carbon and row level filtering + * will be done by spark for vector. + * There is no change in flow for non-vector based queries. + */ + @CarbonProperty + public static final String CARBON_PUSH_ROW_FILTERS_FOR_VECTOR = + "carbon.push.rowfilters.for.vector"; + + public static final String CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT = "false"; + private CarbonCommonConstants() { } } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java index 7b1aca1f2b4..d84434e9110 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/DimensionRawColumnChunk.java @@ -33,6 +33,7 @@ import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory; import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; import org.apache.carbondata.core.scan.result.vector.impl.CarbonDictionaryImpl; import org.apache.carbondata.core.util.CarbonMetadataUtil; import org.apache.carbondata.format.Encoding; @@ -121,6 +122,22 @@ public DimensionColumnPage convertToDimColDataChunkWithOutCache(int index) { } } + /** + * Convert raw data with specified page number processed to DimensionColumnDataChunk and fill + * the vector + * + * @param pageNumber page number to decode and fill the vector + * @param vectorInfo vector to be filled with column page + */ + public void convertToDimColDataChunkAndFillVector(int pageNumber, ColumnVectorInfo vectorInfo) { + assert pageNumber < pagesCount; + try { + chunkReader.decodeColumnPageAndFillVector(this, pageNumber, vectorInfo); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + @Override public void freeMemory() { super.freeMemory(); if (null != dataChunks) { diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionColumnPage.java index c815e4d7220..e650e0e169b 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/FixedLengthDimensionColumnPage.java @@ -46,10 +46,37 @@ public FixedLengthDimensionColumnPage(byte[] dataChunk, int[] invertedIndex, dataChunk.length; dataChunkStore = DimensionChunkStoreFactory.INSTANCE .getDimensionChunkStore(columnValueSize, isExplicitSorted, numberOfRows, totalSize, - DimensionStoreType.FIXED_LENGTH, null); + DimensionStoreType.FIXED_LENGTH, null, false); dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunk); } + /** + * Constructor + * + * @param dataChunk data chunk + * @param invertedIndex inverted index + * @param invertedIndexReverse reverse inverted index + * @param numberOfRows number of rows + * @param columnValueSize size of each column value + * @param vectorInfo vector to be filled with decoded column page. + */ + public FixedLengthDimensionColumnPage(byte[] dataChunk, int[] invertedIndex, + int[] invertedIndexReverse, int numberOfRows, int columnValueSize, + ColumnVectorInfo vectorInfo) { + boolean isExplicitSorted = isExplicitSorted(invertedIndex); + long totalSize = isExplicitSorted ? + dataChunk.length + (2 * numberOfRows * CarbonCommonConstants.INT_SIZE_IN_BYTE) : + dataChunk.length; + dataChunkStore = DimensionChunkStoreFactory.INSTANCE + .getDimensionChunkStore(columnValueSize, isExplicitSorted, numberOfRows, totalSize, + DimensionStoreType.FIXED_LENGTH, null, vectorInfo != null); + if (vectorInfo == null) { + dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunk); + } else { + dataChunkStore.fillVector(invertedIndex, invertedIndexReverse, dataChunk, vectorInfo); + } + } + /** * Below method will be used to fill the data based on offset and row id * diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/MeasureRawColumnChunk.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/MeasureRawColumnChunk.java index 9448f3032a2..5ae17a9aee9 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/MeasureRawColumnChunk.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/MeasureRawColumnChunk.java @@ -24,6 +24,7 @@ import org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; /** * Contains raw measure data @@ -94,7 +95,7 @@ public ColumnPage decodeColumnPage(int pageNumber) { public ColumnPage convertToColumnPageWithOutCache(int index) { assert index < pagesCount; // in case of filter query filter columns blocklet pages will uncompressed - // so no need to decode again + // so no need to decodeAndFillVector again if (null != columnPages && columnPages[index] != null) { return columnPages[index]; } @@ -105,6 +106,22 @@ public ColumnPage convertToColumnPageWithOutCache(int index) { } } + /** + * Convert raw data with specified page number processed to DimensionColumnDataChunk and fill the + * vector + * + * @param pageNumber page number to decode and fill the vector + * @param vectorInfo vector to be filled with column page + */ + public void convertToColumnPageAndFillVector(int pageNumber, ColumnVectorInfo vectorInfo) { + assert pageNumber < pagesCount; + try { + chunkReader.decodeColumnPageAndFillVector(this, pageNumber, vectorInfo); + } catch (IOException | MemoryException e) { + throw new RuntimeException(e); + } + } + @Override public void freeMemory() { super.freeMemory(); if (null != columnPages) { diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/VariableLengthDimensionColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/VariableLengthDimensionColumnPage.java index a404ff76a3a..0b88dc9d732 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/VariableLengthDimensionColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/impl/VariableLengthDimensionColumnPage.java @@ -30,10 +30,31 @@ public class VariableLengthDimensionColumnPage extends AbstractDimensionColumnPa /** * Constructor for this class + * @param dataChunks data chunk + * @param invertedIndex inverted index + * @param invertedIndexReverse reverse inverted index + * @param numberOfRows number of rows + * @param dictionary carbon local dictionary for string column. */ public VariableLengthDimensionColumnPage(byte[] dataChunks, int[] invertedIndex, int[] invertedIndexReverse, int numberOfRows, DimensionStoreType dimStoreType, CarbonDictionary dictionary) { + this(dataChunks, invertedIndex, invertedIndexReverse, numberOfRows, dimStoreType, dictionary, + null); + } + + /** + * Constructor for this class + * @param dataChunks data chunk + * @param invertedIndex inverted index + * @param invertedIndexReverse reverse inverted index + * @param numberOfRows number of rows + * @param dictionary carbon local dictionary for string column. + * @param vectorInfo vector to be filled with decoded column page. + */ + public VariableLengthDimensionColumnPage(byte[] dataChunks, int[] invertedIndex, + int[] invertedIndexReverse, int numberOfRows, DimensionStoreType dimStoreType, + CarbonDictionary dictionary, ColumnVectorInfo vectorInfo) { boolean isExplicitSorted = isExplicitSorted(invertedIndex); long totalSize = 0; switch (dimStoreType) { @@ -54,10 +75,15 @@ public VariableLengthDimensionColumnPage(byte[] dataChunks, int[] invertedIndex, } dataChunkStore = DimensionChunkStoreFactory.INSTANCE .getDimensionChunkStore(0, isExplicitSorted, numberOfRows, totalSize, dimStoreType, - dictionary); - dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunks); + dictionary, vectorInfo != null); + if (vectorInfo != null) { + dataChunkStore.fillVector(invertedIndex, invertedIndexReverse, dataChunks, vectorInfo); + } else { + dataChunkStore.putArray(invertedIndex, invertedIndexReverse, dataChunks); + } } + /** * Below method will be used to fill the data based on offset and row id * diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/DimensionColumnChunkReader.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/DimensionColumnChunkReader.java index fd81973a821..e2d6be7229d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/DimensionColumnChunkReader.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/DimensionColumnChunkReader.java @@ -22,6 +22,7 @@ import org.apache.carbondata.core.datastore.chunk.DimensionColumnPage; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; /** * Interface for reading the data chunk @@ -60,4 +61,10 @@ DimensionRawColumnChunk readRawDimensionChunk(FileReader fileReader, int columnI */ DimensionColumnPage decodeColumnPage(DimensionRawColumnChunk dimensionRawColumnChunk, int pageNumber) throws IOException, MemoryException; + + /** + * Decodes the raw data chunk of given page number and fill the vector with decoded data. + */ + void decodeColumnPageAndFillVector(DimensionRawColumnChunk dimensionRawColumnChunk, + int pageNumber, ColumnVectorInfo vectorInfo) throws IOException, MemoryException; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/MeasureColumnChunkReader.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/MeasureColumnChunkReader.java index f1392d09e76..0fbbe6b8884 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/MeasureColumnChunkReader.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/MeasureColumnChunkReader.java @@ -22,6 +22,7 @@ import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; /** * Reader interface for reading the measure blocks from file @@ -58,4 +59,10 @@ MeasureRawColumnChunk readRawMeasureChunk(FileReader fileReader, int columnIndex ColumnPage decodeColumnPage(MeasureRawColumnChunk measureRawColumnChunk, int pageNumber) throws IOException, MemoryException; + /** + * Decode raw data and fill the vector + */ + void decodeColumnPageAndFillVector(MeasureRawColumnChunk measureRawColumnChunk, + int pageNumber, ColumnVectorInfo vectorInfo) throws IOException, MemoryException; + } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/AbstractChunkReader.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/AbstractChunkReader.java index b08f9edb6b7..2c42abe388e 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/AbstractChunkReader.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/AbstractChunkReader.java @@ -16,10 +16,15 @@ */ package org.apache.carbondata.core.datastore.chunk.reader.dimension; +import java.io.IOException; + import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader; import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.keygenerator.mdkey.NumberCompressor; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; import org.apache.carbondata.core.util.CarbonProperties; /** @@ -79,4 +84,10 @@ public AbstractChunkReader(final int[] eachColumnValueSize, final String filePat this.numberOfRows = numberOfRows; } + @Override + public void decodeColumnPageAndFillVector(DimensionRawColumnChunk dimensionRawColumnChunk, + int pageNumber, ColumnVectorInfo vectorInfo) throws IOException, MemoryException { + throw new UnsupportedOperationException( + "This operation is not supported in this reader " + this.getClass().getName()); + } } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimChunkFileBasedPageLevelReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimChunkFileBasedPageLevelReaderV3.java index 6efaf8a1c8b..86a4334e130 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimChunkFileBasedPageLevelReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimChunkFileBasedPageLevelReaderV3.java @@ -171,6 +171,6 @@ protected DimensionRawColumnChunk[] readRawDimensionChunksInGroup(FileReader fil ByteBuffer rawData = dimensionRawColumnChunk.getFileReader() .readByteBuffer(filePath, offset, length); - return decodeDimension(dimensionRawColumnChunk, rawData, pageMetadata, 0); + return decodeDimension(dimensionRawColumnChunk, rawData, pageMetadata, 0, null); } } \ No newline at end of file diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java index b96e52ed964..84dfe691000 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/dimension/v3/CompressedDimensionChunkFileBasedReaderV3.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.BitSet; import java.util.List; import org.apache.carbondata.core.constants.CarbonCommonConstants; @@ -39,6 +40,7 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.carbondata.core.scan.executor.util.QueryUtil; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; import org.apache.carbondata.core.util.CarbonMetadataUtil; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.format.DataChunk2; @@ -207,6 +209,12 @@ protected DimensionRawColumnChunk[] readRawDimensionChunksInGroup(FileReader fil */ @Override public DimensionColumnPage decodeColumnPage( DimensionRawColumnChunk rawColumnPage, int pageNumber) throws IOException, MemoryException { + return decodeColumnPage(rawColumnPage, pageNumber, null); + } + + private DimensionColumnPage decodeColumnPage( + DimensionRawColumnChunk rawColumnPage, int pageNumber, + ColumnVectorInfo vectorInfo) throws IOException, MemoryException { // data chunk of blocklet column DataChunk3 dataChunk3 = rawColumnPage.getDataChunkV3(); // get the data buffer @@ -221,49 +229,70 @@ protected DimensionRawColumnChunk[] readRawDimensionChunksInGroup(FileReader fil int offset = (int) rawColumnPage.getOffSet() + dimensionChunksLength .get(rawColumnPage.getColumnIndex()) + dataChunk3.getPage_offset().get(pageNumber); // first read the data and uncompressed it - return decodeDimension(rawColumnPage, rawData, pageMetadata, offset); + return decodeDimension(rawColumnPage, rawData, pageMetadata, offset, vectorInfo); + } + + @Override + public void decodeColumnPageAndFillVector(DimensionRawColumnChunk dimensionRawColumnChunk, + int pageNumber, ColumnVectorInfo vectorInfo) throws IOException, MemoryException { + DimensionColumnPage columnPage = + decodeColumnPage(dimensionRawColumnChunk, pageNumber, vectorInfo); + columnPage.freeMemory(); } - private ColumnPage decodeDimensionByMeta(DataChunk2 pageMetadata, - ByteBuffer pageData, int offset, boolean isLocalDictEncodedPage) + private ColumnPage decodeDimensionByMeta(DataChunk2 pageMetadata, ByteBuffer pageData, int offset, + boolean isLocalDictEncodedPage, ColumnVectorInfo vectorInfo, BitSet nullBitSet) throws IOException, MemoryException { List encodings = pageMetadata.getEncoders(); List encoderMetas = pageMetadata.getEncoder_meta(); String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta( pageMetadata.getChunk_meta()); ColumnPageDecoder decoder = encodingFactory.createDecoder(encodings, encoderMetas, - compressorName); - return decoder - .decode(pageData.array(), offset, pageMetadata.data_page_length, isLocalDictEncodedPage); + compressorName, vectorInfo != null); + if (vectorInfo != null) { + return decoder + .decodeAndFillVector(pageData.array(), offset, pageMetadata.data_page_length, vectorInfo, + nullBitSet, isLocalDictEncodedPage); + } else { + return decoder + .decode(pageData.array(), offset, pageMetadata.data_page_length, isLocalDictEncodedPage); + } } protected DimensionColumnPage decodeDimension(DimensionRawColumnChunk rawColumnPage, - ByteBuffer pageData, DataChunk2 pageMetadata, int offset) + ByteBuffer pageData, DataChunk2 pageMetadata, int offset, ColumnVectorInfo vectorInfo) throws IOException, MemoryException { List encodings = pageMetadata.getEncoders(); if (CarbonUtil.isEncodedWithMeta(encodings)) { - ColumnPage decodedPage = decodeDimensionByMeta(pageMetadata, pageData, offset, - null != rawColumnPage.getLocalDictionary()); - decodedPage.setNullBits(QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor)); int[] invertedIndexes = new int[0]; int[] invertedIndexesReverse = new int[0]; // in case of no dictionary measure data types, if it is included in sort columns // then inverted index to be uncompressed + boolean isExplicitSorted = + CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX); + int dataOffset = offset; if (encodings.contains(Encoding.INVERTED_INDEX)) { offset += pageMetadata.data_page_length; - if (CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX)) { + if (isExplicitSorted) { invertedIndexes = CarbonUtil .getUnCompressColumnIndex(pageMetadata.rowid_page_length, pageData, offset); - // get the reverse index - invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); + if (vectorInfo == null) { + // get the reverse index + invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); + } else { + vectorInfo.invertedIndex = invertedIndexes; + } } } + BitSet nullBitSet = QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor); + ColumnPage decodedPage = decodeDimensionByMeta(pageMetadata, pageData, dataOffset, + null != rawColumnPage.getLocalDictionary(), vectorInfo, nullBitSet); + decodedPage.setNullBits(nullBitSet); return new ColumnPageWrapper(decodedPage, rawColumnPage.getLocalDictionary(), invertedIndexes, - invertedIndexesReverse, isEncodedWithAdaptiveMeta(pageMetadata), - CarbonUtil.hasEncoding(pageMetadata.encoders, Encoding.INVERTED_INDEX)); + invertedIndexesReverse, isEncodedWithAdaptiveMeta(pageMetadata), isExplicitSorted); } else { // following code is for backward compatibility - return decodeDimensionLegacy(rawColumnPage, pageData, pageMetadata, offset); + return decodeDimensionLegacy(rawColumnPage, pageData, pageMetadata, offset, vectorInfo); } } @@ -283,8 +312,8 @@ public boolean isEncodedWithAdaptiveMeta(DataChunk2 pageMetadata) { } private DimensionColumnPage decodeDimensionLegacy(DimensionRawColumnChunk rawColumnPage, - ByteBuffer pageData, DataChunk2 pageMetadata, int offset) throws IOException, - MemoryException { + ByteBuffer pageData, DataChunk2 pageMetadata, int offset, ColumnVectorInfo vectorInfo) + throws IOException, MemoryException { byte[] dataPage; int[] rlePage; int[] invertedIndexes = new int[0]; @@ -296,8 +325,10 @@ private DimensionColumnPage decodeDimensionLegacy(DimensionRawColumnChunk rawCol invertedIndexes = CarbonUtil .getUnCompressColumnIndex(pageMetadata.rowid_page_length, pageData, offset); offset += pageMetadata.rowid_page_length; - // get the reverse index - invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); + if (vectorInfo == null) { + // get the reverse index + invertedIndexesReverse = CarbonUtil.getInvertedReverseIndex(invertedIndexes); + } } // if rle is applied then read the rle block chunk and then uncompress //then actual data based on rle block @@ -324,13 +355,13 @@ private DimensionColumnPage decodeDimensionLegacy(DimensionRawColumnChunk rawCol columnDataChunk = new VariableLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, pageMetadata.getNumberOfRowsInpage(), dimStoreType, - rawColumnPage.getLocalDictionary()); + rawColumnPage.getLocalDictionary(), vectorInfo); } else { // to store fixed length column chunk values columnDataChunk = new FixedLengthDimensionColumnPage(dataPage, invertedIndexes, invertedIndexesReverse, pageMetadata.getNumberOfRowsInpage(), - eachColumnValueSize[rawColumnPage.getColumnIndex()]); + eachColumnValueSize[rawColumnPage.getColumnIndex()], vectorInfo); } return columnDataChunk; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/AbstractMeasureChunkReader.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/AbstractMeasureChunkReader.java index 6774fcbef2d..cd233d2c57b 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/AbstractMeasureChunkReader.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/AbstractMeasureChunkReader.java @@ -16,10 +16,15 @@ */ package org.apache.carbondata.core.datastore.chunk.reader.measure; +import java.io.IOException; + +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader; import org.apache.carbondata.core.datastore.compression.Compressor; import org.apache.carbondata.core.datastore.page.encoding.DefaultEncodingFactory; import org.apache.carbondata.core.datastore.page.encoding.EncodingFactory; +import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; /** * Measure block reader abstract class @@ -48,4 +53,11 @@ public AbstractMeasureChunkReader(String filePath, int numberOfRows) { this.filePath = filePath; this.numberOfRows = numberOfRows; } + + @Override + public void decodeColumnPageAndFillVector(MeasureRawColumnChunk measureRawColumnChunk, + int pageNumber, ColumnVectorInfo vectorInfo) throws IOException, MemoryException { + throw new UnsupportedOperationException( + "This operation is not supported in this class " + getClass().getName()); + } } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java index 240771a77b1..fc043e47a04 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMeasureChunkFileBasedReaderV3.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.BitSet; import java.util.List; import org.apache.carbondata.core.datastore.FileReader; @@ -29,6 +30,7 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.carbondata.core.scan.executor.util.QueryUtil; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; import org.apache.carbondata.core.util.CarbonMetadataUtil; import org.apache.carbondata.core.util.CarbonUtil; import org.apache.carbondata.format.DataChunk2; @@ -190,6 +192,19 @@ protected MeasureRawColumnChunk[] readRawMeasureChunksInGroup(FileReader fileRea public ColumnPage decodeColumnPage( MeasureRawColumnChunk rawColumnChunk, int pageNumber) throws IOException, MemoryException { + return decodeColumnPage(rawColumnChunk, pageNumber, null); + } + + @Override + public void decodeColumnPageAndFillVector(MeasureRawColumnChunk measureRawColumnChunk, + int pageNumber, ColumnVectorInfo vectorInfo) throws IOException, MemoryException { + ColumnPage columnPage = decodeColumnPage(measureRawColumnChunk, pageNumber, vectorInfo); + columnPage.freeMemory(); + } + + private ColumnPage decodeColumnPage( + MeasureRawColumnChunk rawColumnChunk, int pageNumber, ColumnVectorInfo vectorInfo) + throws IOException, MemoryException { // data chunk of blocklet column DataChunk3 dataChunk3 = rawColumnChunk.getDataChunkV3(); // data chunk of page @@ -203,23 +218,30 @@ public ColumnPage decodeColumnPage( int offset = (int) rawColumnChunk.getOffSet() + measureColumnChunkLength.get(rawColumnChunk.getColumnIndex()) + dataChunk3.getPage_offset().get(pageNumber); - ColumnPage decodedPage = decodeMeasure(pageMetadata, rawColumnChunk.getRawData(), offset); - decodedPage.setNullBits(QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor)); + BitSet nullBitSet = QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor); + ColumnPage decodedPage = + decodeMeasure(pageMetadata, rawColumnChunk.getRawData(), offset, vectorInfo, nullBitSet); + decodedPage.setNullBits(nullBitSet); return decodedPage; } /** * Decode measure column page with page header and raw data starting from offset */ - protected ColumnPage decodeMeasure(DataChunk2 pageMetadata, ByteBuffer pageData, int offset) - throws MemoryException, IOException { + protected ColumnPage decodeMeasure(DataChunk2 pageMetadata, ByteBuffer pageData, int offset, + ColumnVectorInfo vectorInfo, BitSet nullBitSet) throws MemoryException, IOException { List encodings = pageMetadata.getEncoders(); List encoderMetas = pageMetadata.getEncoder_meta(); - String compressorName = CarbonMetadataUtil.getCompressorNameFromChunkMeta( - pageMetadata.getChunk_meta()); - ColumnPageDecoder codec = encodingFactory.createDecoder(encodings, encoderMetas, - compressorName); - return codec.decode(pageData.array(), offset, pageMetadata.data_page_length); + String compressorName = + CarbonMetadataUtil.getCompressorNameFromChunkMeta(pageMetadata.getChunk_meta()); + ColumnPageDecoder codec = + encodingFactory.createDecoder(encodings, encoderMetas, compressorName, vectorInfo != null); + if (vectorInfo != null) { + return codec + .decodeAndFillVector(pageData.array(), offset, pageMetadata.data_page_length, vectorInfo, + nullBitSet, false); + } else { + return codec.decode(pageData.array(), offset, pageMetadata.data_page_length); + } } - } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMsrChunkFileBasedPageLevelReaderV3.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMsrChunkFileBasedPageLevelReaderV3.java index 924a206404d..b092350e856 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMsrChunkFileBasedPageLevelReaderV3.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/reader/measure/v3/CompressedMsrChunkFileBasedPageLevelReaderV3.java @@ -19,6 +19,7 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.BitSet; import org.apache.carbondata.core.datastore.FileReader; import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; @@ -151,8 +152,9 @@ protected MeasureRawColumnChunk[] readRawMeasureChunksInGroup(FileReader fileRea ByteBuffer buffer = rawColumnPage.getFileReader() .readByteBuffer(filePath, offset, pageMetadata.data_page_length); - ColumnPage decodedPage = decodeMeasure(pageMetadata, buffer, 0); - decodedPage.setNullBits(QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor)); + BitSet nullBitSet = QueryUtil.getNullBitSet(pageMetadata.presence, this.compressor); + ColumnPage decodedPage = decodeMeasure(pageMetadata, buffer, 0, null, nullBitSet); + decodedPage.setNullBits(nullBitSet); return decodedPage; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionChunkStoreFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionChunkStoreFactory.java index c7bcef164e8..5346f35159a 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionChunkStoreFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionChunkStoreFactory.java @@ -65,8 +65,8 @@ private DimensionChunkStoreFactory() { */ public DimensionDataChunkStore getDimensionChunkStore(int columnValueSize, boolean isInvertedIndex, int numberOfRows, long totalSize, DimensionStoreType storeType, - CarbonDictionary dictionary) { - if (isUnsafe) { + CarbonDictionary dictionary, boolean fillDirectVector) { + if (isUnsafe && !fillDirectVector) { switch (storeType) { case FIXED_LENGTH: return new UnsafeFixedLengthDimensionDataChunkStore(totalSize, columnValueSize, @@ -79,24 +79,24 @@ public DimensionDataChunkStore getDimensionChunkStore(int columnValueSize, numberOfRows); case LOCAL_DICT: return new LocalDictDimensionDataChunkStore( - new UnsafeFixedLengthDimensionDataChunkStore(totalSize, - 3, isInvertedIndex, numberOfRows), - dictionary); + new UnsafeFixedLengthDimensionDataChunkStore(totalSize, 3, isInvertedIndex, + numberOfRows), dictionary); default: throw new UnsupportedOperationException("Invalid dimension store type"); } } else { switch (storeType) { case FIXED_LENGTH: - return new SafeFixedLengthDimensionDataChunkStore(isInvertedIndex, columnValueSize); + return new SafeFixedLengthDimensionDataChunkStore(isInvertedIndex, columnValueSize, + numberOfRows); case VARIABLE_SHORT_LENGTH: return new SafeVariableShortLengthDimensionDataChunkStore(isInvertedIndex, numberOfRows); case VARIABLE_INT_LENGTH: return new SafeVariableIntLengthDimensionDataChunkStore(isInvertedIndex, numberOfRows); case LOCAL_DICT: return new LocalDictDimensionDataChunkStore( - new SafeFixedLengthDimensionDataChunkStore(isInvertedIndex, - 3), dictionary); + new SafeFixedLengthDimensionDataChunkStore(isInvertedIndex, 3, numberOfRows), + dictionary); default: throw new UnsupportedOperationException("Invalid dimension store type"); } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionDataChunkStore.java index 28aed5b4d3c..8972ddb78fd 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/DimensionDataChunkStore.java @@ -18,6 +18,7 @@ package org.apache.carbondata.core.datastore.chunk.store; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; /** * Interface responsibility is to store dimension data in memory. @@ -34,6 +35,12 @@ public interface DimensionDataChunkStore { */ void putArray(int[] invertedIndex, int[] invertedIndexReverse, byte[] data); + /** + * Fill the vector with decoded data. + */ + void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data, + ColumnVectorInfo vectorInfo); + /** * Below method will be used to get the row * based on row id passed diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java index 0d06f61a7ee..e70424f2be6 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/LocalDictDimensionDataChunkStore.java @@ -21,6 +21,8 @@ import org.apache.carbondata.core.datastore.chunk.store.DimensionDataChunkStore; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.util.CarbonUtil; /** * Dimension chunk store for local dictionary encoded data. @@ -49,6 +51,29 @@ public void putArray(int[] invertedIndex, int[] invertedIndexReverse, byte[] dat this.dimensionDataChunkStore.putArray(invertedIndex, invertedIndexReverse, data); } + @Override + public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data, + ColumnVectorInfo vectorInfo) { + int columnValueSize = dimensionDataChunkStore.getColumnValueSize(); + int rowsNum = data.length / columnValueSize; + CarbonColumnVector vector = vectorInfo.vector; + if (!dictionary.isDictionaryUsed()) { + vector.setDictionary(dictionary); + dictionary.setDictionaryUsed(); + } + for (int i = 0; i < rowsNum; i++) { + int surrogate = CarbonUtil.getSurrogateInternal(data, i * columnValueSize, columnValueSize); + if (surrogate == CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY) { + vector.putNull(i); + vector.getDictionaryVector().putNull(i); + } else { + vector.putNotNull(i); + vector.getDictionaryVector().putInt(i, surrogate); + } + + } + } + @Override public byte[] getRow(int rowId) { return dictionary.getDictionaryValue(dimensionDataChunkStore.getSurrogate(rowId)); } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java new file mode 100644 index 00000000000..afe8dbc1ff3 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/AbstractNonDictionaryVectorFiller.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.chunk.store.impl.safe; + +import java.nio.ByteBuffer; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.util.ByteUtil; +import org.apache.carbondata.core.util.DataTypeUtil; + +public abstract class AbstractNonDictionaryVectorFiller { + + protected int lengthSize; + protected int numberOfRows; + + public AbstractNonDictionaryVectorFiller(int lengthSize, int numberOfRows) { + this.lengthSize = lengthSize; + this.numberOfRows = numberOfRows; + } + + public abstract void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer); + + public int getLengthFromBuffer(ByteBuffer buffer) { + return buffer.getShort(); + } +} + +class NonDictionaryVectorFillerFactory { + + public static AbstractNonDictionaryVectorFiller getVectorFiller(DataType type, int lengthSize, + int numberOfRows) { + if (type == DataTypes.STRING || type == DataTypes.VARCHAR) { + if (lengthSize == 2) { + return new StringVectorFiller(lengthSize, numberOfRows); + } else { + return new LongStringVectorFiller(lengthSize, numberOfRows); + } + } else if (type == DataTypes.TIMESTAMP) { + return new TimeStampVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.BOOLEAN) { + return new BooleanVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.SHORT) { + return new ShortVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.INT) { + return new IntVectorFiller(lengthSize, numberOfRows); + } else if (type == DataTypes.LONG) { + return new LongStringVectorFiller(lengthSize, numberOfRows); + } + return new StringVectorFiller(lengthSize, numberOfRows); + } + +} + +class StringVectorFiller extends AbstractNonDictionaryVectorFiller { + + public StringVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + int currentOffset = lengthSize; + ByteUtil.UnsafeComparer comparer = ByteUtil.UnsafeComparer.INSTANCE; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (comparer.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) { + vector.putNull(i); + } else { + vector.putByteArray(i, currentOffset, length, data); + } + currentOffset = startOffset + lengthSize; + } + int length = (data.length - currentOffset); + if (comparer.equals(CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY, 0, + CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY.length, data, currentOffset, length)) { + vector.putNull(numberOfRows - 1); + } else { + vector.putByteArray(numberOfRows - 1, currentOffset, length, data); + } + } +} + +class LongStringVectorFiller extends StringVectorFiller { + public LongStringVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public int getLengthFromBuffer(ByteBuffer buffer) { + return buffer.getInt(); + } +} + +class BooleanVectorFiller extends AbstractNonDictionaryVectorFiller { + + public BooleanVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + int currentOffset = lengthSize; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (length == 0) { + vector.putNull(i); + } else { + vector.putBoolean(i, ByteUtil.toBoolean(data[currentOffset])); + } + currentOffset = startOffset + lengthSize; + } + int length = (data.length - currentOffset); + if (length == 0) { + vector.putNull(numberOfRows - 1); + } else { + vector.putBoolean(numberOfRows - 1, ByteUtil.toBoolean(data[currentOffset])); + } + } +} + +class ShortVectorFiller extends AbstractNonDictionaryVectorFiller { + + public ShortVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + int currentOffset = lengthSize; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (length == 0) { + vector.putNull(i); + } else { + vector.putShort(i, ByteUtil.toXorShort(data, currentOffset, length)); + } + currentOffset = startOffset + lengthSize; + } + int length = (data.length - currentOffset); + if (length == 0) { + vector.putNull(numberOfRows - 1); + } else { + vector.putShort(numberOfRows - 1, ByteUtil.toXorShort(data, currentOffset, length)); + } + } +} + +class IntVectorFiller extends AbstractNonDictionaryVectorFiller { + + public IntVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + int currentOffset = lengthSize; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (length == 0) { + vector.putNull(i); + } else { + vector.putInt(i, ByteUtil.toXorInt(data, currentOffset, length)); + } + currentOffset = startOffset + lengthSize; + } + int length = (data.length - currentOffset); + if (length == 0) { + vector.putNull(numberOfRows - 1); + } else { + vector.putInt(numberOfRows - 1, ByteUtil.toXorInt(data, currentOffset, length)); + } + } +} + +class LongVectorFiller extends AbstractNonDictionaryVectorFiller { + + public LongVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + int currentOffset = lengthSize; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (length == 0) { + vector.putNull(i); + } else { + vector.putLong(i, DataTypeUtil + .getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(), currentOffset, + length)); + } + currentOffset = startOffset + lengthSize; + } + int length = (data.length - currentOffset); + if (length == 0) { + vector.putNull(numberOfRows - 1); + } else { + vector.putLong(numberOfRows - 1, DataTypeUtil + .getDataBasedOnRestructuredDataType(data, vector.getBlockDataType(), currentOffset, + length)); + } + } +} + +class TimeStampVectorFiller extends AbstractNonDictionaryVectorFiller { + + public TimeStampVectorFiller(int lengthSize, int numberOfRows) { + super(lengthSize, numberOfRows); + } + + @Override + public void fillVector(byte[] data, CarbonColumnVector vector, ByteBuffer buffer) { + // start position will be used to store the current data position + int startOffset = 0; + int currentOffset = lengthSize; + for (int i = 0; i < numberOfRows - 1; i++) { + buffer.position(startOffset); + startOffset += getLengthFromBuffer(buffer) + lengthSize; + int length = startOffset - (currentOffset); + if (length == 0) { + vector.putNull(i); + } else { + vector.putLong(i, ByteUtil.toXorLong(data, currentOffset, length) * 1000L); + } + currentOffset = startOffset + lengthSize; + } + int length = (data.length - currentOffset); + if (length == 0) { + vector.putNull(numberOfRows - 1); + } else { + vector.putLong(numberOfRows - 1, ByteUtil.toXorLong(data, currentOffset, length) * 1000L); + } + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeFixedLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeFixedLengthDimensionDataChunkStore.java index 41218d0d840..8293f950a54 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeFixedLengthDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeFixedLengthDimensionDataChunkStore.java @@ -17,6 +17,15 @@ package org.apache.carbondata.core.datastore.chunk.store.impl.safe; +import java.util.BitSet; + +import org.apache.carbondata.core.keygenerator.directdictionary.timestamp.DateDirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertableVector; import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.CarbonUtil; @@ -30,9 +39,59 @@ public class SafeFixedLengthDimensionDataChunkStore extends SafeAbsractDimension */ private int columnValueSize; - public SafeFixedLengthDimensionDataChunkStore(boolean isInvertedIndex, int columnValueSize) { + private int numOfRows; + + public SafeFixedLengthDimensionDataChunkStore(boolean isInvertedIndex, int columnValueSize, + int numOfRows) { super(isInvertedIndex); this.columnValueSize = columnValueSize; + this.numOfRows = numOfRows; + } + + @Override + public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data, + ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet deletedRows = vectorInfo.deletedRows; + BitSet nullBits = new BitSet(numOfRows); + vector = ColumnarVectorWrapperDirectFactory + .getDirectVectorWrapperFactory(vector, invertedIndex, nullBits, deletedRows, false); + fillVector(data, vectorInfo, vector); + if (vector instanceof ConvertableVector) { + ((ConvertableVector) vector).convert(); + } + } + + private void fillVector(byte[] data, ColumnVectorInfo vectorInfo, CarbonColumnVector vector) { + DataType dataType = vectorInfo.vector.getBlockDataType(); + if (dataType == DataTypes.DATE) { + for (int i = 0; i < numOfRows; i++) { + int surrogateInternal = + CarbonUtil.getSurrogateInternal(data, i * columnValueSize, columnValueSize); + if (surrogateInternal == 1) { + vector.putNull(i); + } else { + vector.putInt(i, surrogateInternal - DateDirectDictionaryGenerator.cutOffDate); + } + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < numOfRows; i++) { + int surrogateInternal = + CarbonUtil.getSurrogateInternal(data, i * columnValueSize, columnValueSize); + if (surrogateInternal == 1) { + vector.putNull(i); + } else { + Object valueFromSurrogate = + vectorInfo.directDictionaryGenerator.getValueFromSurrogate(surrogateInternal); + vector.putLong(i, (long)valueFromSurrogate); + } + } + } else { + for (int i = 0; i < numOfRows; i++) { + vector.putInt(i, + CarbonUtil.getSurrogateInternal(data, i * columnValueSize, columnValueSize)); + } + } } /** diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java index 85535061b27..4be7857e1ce 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/safe/SafeVariableLengthDimensionDataChunkStore.java @@ -18,11 +18,15 @@ package org.apache.carbondata.core.datastore.chunk.store.impl.safe; import java.nio.ByteBuffer; +import java.util.BitSet; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertableVector; import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.DataTypeUtil; @@ -91,6 +95,31 @@ public void putArray(final int[] invertedIndex, final int[] invertedIndexReverse } } + @Override + public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data, + ColumnVectorInfo vectorInfo) { + this.invertedIndexReverse = invertedIndex; + + // as first position will be start from 2 byte as data is stored first in the memory block + // we need to skip first two bytes this is because first two bytes will be length of the data + // which we have to skip + int lengthSize = getLengthSize(); + // creating a byte buffer which will wrap the length of the row + CarbonColumnVector vector = vectorInfo.vector; + DataType dt = vector.getType(); + ByteBuffer buffer = ByteBuffer.wrap(data); + BitSet deletedRows = vectorInfo.deletedRows; + AbstractNonDictionaryVectorFiller vectorFiller = + NonDictionaryVectorFillerFactory.getVectorFiller(dt, lengthSize, numberOfRows); + BitSet nullBits = new BitSet(numberOfRows); + vector = ColumnarVectorWrapperDirectFactory + .getDirectVectorWrapperFactory(vector, invertedIndex, nullBits, deletedRows, false); + vectorFiller.fillVector(data, vector, buffer); + if (vector instanceof ConvertableVector) { + ((ConvertableVector) vector).convert(); + } + } + protected abstract int getLengthSize(); protected abstract int getLengthFromBuffer(ByteBuffer buffer); @@ -150,7 +179,7 @@ public void fillRow(int rowId, CarbonColumnVector vector, int vectorRow) { vector.putNull(vectorRow); } else { if (dt == DataTypes.STRING) { - vector.putBytes(vectorRow, currentDataOffset, length, data); + vector.putByteArray(vectorRow, currentDataOffset, length, data); } else if (dt == DataTypes.BOOLEAN) { vector.putBoolean(vectorRow, ByteUtil.toBoolean(data[currentDataOffset])); } else if (dt == DataTypes.SHORT) { diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java index 89bce2d6d2b..57e9de5e7fc 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/chunk/store/impl/unsafe/UnsafeAbstractDimensionDataChunkStore.java @@ -24,6 +24,7 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.memory.UnsafeMemoryManager; import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; import org.apache.carbondata.core.util.ThreadLocalTaskInfo; /** @@ -115,6 +116,11 @@ public UnsafeAbstractDimensionDataChunkStore(long totalSize, boolean isInvertedI } } + @Override public void fillVector(int[] invertedIndex, int[] invertedIndexReverse, byte[] data, + ColumnVectorInfo vectorInfo) { + throw new UnsupportedOperationException("This method not supposed to be called here"); + } + /** * Below method will be used to free the memory occupied by the column chunk */ diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorage.java b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorage.java index 6f3f139826b..44b3c12b945 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorage.java @@ -50,8 +50,9 @@ public abstract class BlockIndexerStorage { * * @param rowIds */ - protected Map rleEncodeOnRowId(short[] rowIds, short[] rowIdPage, - short[] rowIdRlePage) { + protected Map rleEncodeOnRowId(short[] rowIds) { + short[] rowIdPage; + short[] rowIdRlePage; List list = new ArrayList(CarbonCommonConstants.CONSTANT_SIZE_TEN); List map = new ArrayList(CarbonCommonConstants.CONSTANT_SIZE_TEN); int k = 0; diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForNoDictionary.java b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForNoDictionary.java index b3e25d35297..bcf5432b815 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForNoDictionary.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForNoDictionary.java @@ -39,8 +39,7 @@ public BlockIndexerStorageForNoDictionary(Object[] dataPage, DataType dataType, Arrays.sort(dataWithRowId); } short[] rowIds = extractDataAndReturnRowId(dataWithRowId, dataPage); - Map rowIdAndRleRowIdPages = - rleEncodeOnRowId(rowIds, getRowIdPage(), getRowIdRlePage()); + Map rowIdAndRleRowIdPages = rleEncodeOnRowId(rowIds); rowIdPage = rowIdAndRleRowIdPages.get("rowIdPage"); rowIdRlePage = rowIdAndRleRowIdPages.get("rowRlePage"); } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForShort.java b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForShort.java index f1b9af2f263..b30396cadd3 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForShort.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/BlockIndexerStorageForShort.java @@ -43,8 +43,7 @@ public BlockIndexerStorageForShort(byte[][] dataPage, boolean rleOnData, Arrays.sort(dataWithRowId); } short[] rowIds = extractDataAndReturnRowId(dataWithRowId, dataPage); - Map rowIdAndRleRowIdPages = - rleEncodeOnRowId(rowIds, getRowIdPage(), getRowIdRlePage()); + Map rowIdAndRleRowIdPages = rleEncodeOnRowId(rowIds); rowIdPage = rowIdAndRleRowIdPages.get("rowIdPage"); rowIdRlePage = rowIdAndRleRowIdPages.get("rowRlePage"); if (rleOnData) { diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/UnBlockIndexer.java b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/UnBlockIndexer.java index a7f38cd6ef6..48484ceb2ac 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/columnar/UnBlockIndexer.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/columnar/UnBlockIndexer.java @@ -28,6 +28,9 @@ private UnBlockIndexer() { public static int[] uncompressIndex(int[] indexData, int[] indexMap) { int actualSize = indexData.length; int mapLength = indexMap.length; + if (indexMap.length == 0) { + return indexData; + } for (int i = 0; i < mapLength; i++) { actualSize += indexData[indexMap[i] + 1] - indexData[indexMap[i]] - 1; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileReaderImpl.java b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileReaderImpl.java index 6fef27809b8..9f0abd92d1e 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileReaderImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/impl/FileReaderImpl.java @@ -76,6 +76,7 @@ public FileReaderImpl(int capacity) { channel.close(); } } + fileNameAndStreamCache.clear(); } /** diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java index 8b9a9a5a49e..94a3630622d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java @@ -39,12 +39,7 @@ import static org.apache.carbondata.core.metadata.datatype.DataTypes.BYTE; import static org.apache.carbondata.core.metadata.datatype.DataTypes.BYTE_ARRAY; -import static org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE; -import static org.apache.carbondata.core.metadata.datatype.DataTypes.FLOAT; -import static org.apache.carbondata.core.metadata.datatype.DataTypes.INT; import static org.apache.carbondata.core.metadata.datatype.DataTypes.LONG; -import static org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT; -import static org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT_INT; public abstract class ColumnPage { @@ -90,7 +85,7 @@ public void setStatsCollector(ColumnPageStatsCollector statsCollector) { private static ColumnPage createDecimalPage(ColumnPageEncoderMeta columnPageEncoderMeta, int pageSize) { - if (unsafe) { + if (unsafe && !columnPageEncoderMeta.isFillCompleteVector()) { try { return new UnsafeDecimalColumnPage(columnPageEncoderMeta, pageSize); } catch (MemoryException e) { @@ -103,7 +98,7 @@ private static ColumnPage createDecimalPage(ColumnPageEncoderMeta columnPageEnco private static ColumnPage createVarLengthPage(ColumnPageEncoderMeta columnPageEncoderMeta, int pageSize) { - if (unsafe) { + if (unsafe && !columnPageEncoderMeta.isFillCompleteVector()) { try { return new UnsafeVarLengthColumnPage(columnPageEncoderMeta, pageSize); } catch (MemoryException e) { @@ -116,7 +111,7 @@ private static ColumnPage createVarLengthPage(ColumnPageEncoderMeta columnPageEn private static ColumnPage createFixLengthPage( ColumnPageEncoderMeta columnPageEncoderMeta, int pageSize) { - if (unsafe) { + if (unsafe && !columnPageEncoderMeta.isFillCompleteVector()) { try { return new UnsafeFixLengthColumnPage(columnPageEncoderMeta, pageSize); } catch (MemoryException e) { @@ -129,7 +124,7 @@ private static ColumnPage createFixLengthPage( private static ColumnPage createFixLengthByteArrayPage( ColumnPageEncoderMeta columnPageEncoderMeta, int pageSize, int eachValueSize) { - if (unsafe) { + if (unsafe && !columnPageEncoderMeta.isFillCompleteVector()) { try { return new UnsafeFixLengthColumnPage(columnPageEncoderMeta, pageSize, eachValueSize); } catch (MemoryException e) { @@ -163,7 +158,7 @@ public static ColumnPage newLocalDictPage(ColumnPageEncoderMeta columnPageEncode CarbonCommonConstants.LOCAL_DICTIONARY_DECODER_BASED_FALLBACK_DEFAULT)); ColumnPage actualPage; ColumnPage encodedPage; - if (unsafe) { + if (unsafe && !columnPageEncoderMeta.isFillCompleteVector()) { actualPage = new UnsafeVarLengthColumnPage(columnPageEncoderMeta, pageSize); encodedPage = new UnsafeFixLengthColumnPage( new ColumnPageEncoderMeta(columnPageEncoderMeta.getColumnSpec(), DataTypes.BYTE_ARRAY, @@ -190,7 +185,7 @@ public static ColumnPage newPage(ColumnPageEncoderMeta columnPageEncoderMeta, in DataType dataType = columnPageEncoderMeta.getStoreDataType(); TableSpec.ColumnSpec columnSpec = columnPageEncoderMeta.getColumnSpec(); String compressorName = columnPageEncoderMeta.getCompressorName(); - if (unsafe) { + if (unsafe && !columnPageEncoderMeta.isFillCompleteVector()) { if (dataType == DataTypes.BOOLEAN) { instance = new UnsafeFixLengthColumnPage( new ColumnPageEncoderMeta(columnSpec, BYTE, compressorName), pageSize); @@ -219,21 +214,23 @@ public static ColumnPage newPage(ColumnPageEncoderMeta columnPageEncoderMeta, in } } else { if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { - instance = newBytePage(columnSpec, new byte[pageSize], compressorName); + instance = newBytePage(columnPageEncoderMeta, new byte[pageSize]); } else if (dataType == DataTypes.SHORT) { - instance = newShortPage(columnSpec, new short[pageSize], compressorName); + instance = newShortPage(columnPageEncoderMeta, new short[pageSize]); } else if (dataType == DataTypes.SHORT_INT) { - instance = newShortIntPage(columnSpec, new byte[pageSize * 3], compressorName); + instance = newShortIntPage(columnPageEncoderMeta, new byte[pageSize * 3]); } else if (dataType == DataTypes.INT) { - instance = newIntPage(columnSpec, new int[pageSize], compressorName); + instance = newIntPage(columnPageEncoderMeta, new int[pageSize]); } else if (dataType == DataTypes.LONG || dataType == DataTypes.TIMESTAMP) { - instance = newLongPage(columnSpec, new long[pageSize], compressorName); + instance = newLongPage( + new ColumnPageEncoderMeta(columnPageEncoderMeta.getColumnSpec(), LONG, + columnPageEncoderMeta.getCompressorName()), new long[pageSize]); } else if (dataType == DataTypes.FLOAT) { - instance = newFloatPage(columnSpec, new float[pageSize], compressorName); + instance = newFloatPage(columnPageEncoderMeta, new float[pageSize]); } else if (dataType == DataTypes.DOUBLE) { - instance = newDoublePage(columnSpec, new double[pageSize], compressorName); + instance = newDoublePage(columnPageEncoderMeta, new double[pageSize]); } else if (DataTypes.isDecimal(dataType)) { - instance = newDecimalPage(columnSpec, new byte[pageSize][], compressorName); + instance = newDecimalPage(columnPageEncoderMeta, new byte[pageSize][]); } else if (dataType == DataTypes.STRING || dataType == DataTypes.BYTE_ARRAY || dataType == DataTypes.VARCHAR) { @@ -253,75 +250,67 @@ public static ColumnPage wrapByteArrayPage(TableSpec.ColumnSpec columnSpec, byte return columnPage; } - private static ColumnPage newBytePage(TableSpec.ColumnSpec columnSpec, byte[] byteData, - String compressorName) { + private static ColumnPage newBytePage(ColumnPageEncoderMeta meta, byte[] byteData) { + ColumnPageEncoderMeta encoderMeta = + new ColumnPageEncoderMeta(meta.getColumnSpec(), BYTE, meta.getCompressorName()); + encoderMeta.setFillCompleteVector(meta.isFillCompleteVector()); ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, BYTE, compressorName), byteData.length); + encoderMeta, byteData.length); columnPage.setBytePage(byteData); return columnPage; } - private static ColumnPage newShortPage(TableSpec.ColumnSpec columnSpec, short[] shortData, - String compressorName) { - ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, SHORT, compressorName), shortData.length); + private static ColumnPage newShortPage(ColumnPageEncoderMeta meta, short[] shortData) { + ColumnPage columnPage = createPage(meta, shortData.length); columnPage.setShortPage(shortData); return columnPage; } - private static ColumnPage newShortIntPage(TableSpec.ColumnSpec columnSpec, byte[] shortIntData, - String compressorName) { - ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, SHORT_INT, compressorName), shortIntData.length / 3); + private static ColumnPage newShortIntPage(ColumnPageEncoderMeta meta, byte[] shortIntData) { + ColumnPage columnPage = createPage(meta, shortIntData.length / 3); columnPage.setShortIntPage(shortIntData); return columnPage; } - private static ColumnPage newIntPage(TableSpec.ColumnSpec columnSpec, int[] intData, - String compressorName) { - ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, INT, compressorName), intData.length); + private static ColumnPage newIntPage(ColumnPageEncoderMeta meta, int[] intData) { + ColumnPage columnPage = createPage(meta, intData.length); columnPage.setIntPage(intData); return columnPage; } - private static ColumnPage newLongPage(TableSpec.ColumnSpec columnSpec, long[] longData, - String compressorName) { - ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, LONG, compressorName), longData.length); + private static ColumnPage newLongPage(ColumnPageEncoderMeta meta, long[] longData) { + ColumnPage columnPage = createPage(meta, longData.length); columnPage.setLongPage(longData); return columnPage; } - private static ColumnPage newFloatPage(TableSpec.ColumnSpec columnSpec, float[] floatData, - String compressorName) { - ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, FLOAT, compressorName), floatData.length); + private static ColumnPage newFloatPage(ColumnPageEncoderMeta meta, float[] floatData) { + ColumnPage columnPage = createPage(meta, floatData.length); columnPage.setFloatPage(floatData); return columnPage; } - private static ColumnPage newDoublePage(TableSpec.ColumnSpec columnSpec, double[] doubleData, - String compressorName) { - ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, DOUBLE, compressorName), doubleData.length); + private static ColumnPage newDoublePage(ColumnPageEncoderMeta meta, double[] doubleData) { + ColumnPage columnPage = createPage(meta, doubleData.length); columnPage.setDoublePage(doubleData); return columnPage; } - private static ColumnPage newDecimalPage(TableSpec.ColumnSpec columnSpec, byte[][] byteArray, - String compressorName) { + private static ColumnPage newDecimalPage(ColumnPageEncoderMeta meta, byte[][] byteArray) { + ColumnPageEncoderMeta encoderMeta = + new ColumnPageEncoderMeta(meta.getColumnSpec(), meta.getColumnSpec().getSchemaDataType(), + meta.getCompressorName()); + encoderMeta.setFillCompleteVector(meta.isFillCompleteVector()); ColumnPage columnPage = createPage( - new ColumnPageEncoderMeta(columnSpec, columnSpec.getSchemaDataType(), compressorName), + encoderMeta, byteArray.length); columnPage.setByteArrayPage(byteArray); return columnPage; } - private static ColumnPage newDecimalPage(TableSpec.ColumnSpec columnSpec, - byte[] lvEncodedByteArray, String compressorName) throws MemoryException { - return VarLengthColumnPageBase.newDecimalColumnPage( - columnSpec, lvEncodedByteArray, compressorName); + private static ColumnPage newDecimalPage(ColumnPageEncoderMeta meta, + byte[] lvEncodedByteArray) throws MemoryException { + return VarLengthColumnPageBase.newDecimalColumnPage(meta, lvEncodedByteArray); } private static ColumnPage newLVBytesPage(TableSpec.ColumnSpec columnSpec, @@ -633,6 +622,56 @@ public boolean getBoolean(int rowId) { */ public abstract double getDouble(int rowId); + + + + + /** + * Get byte value at rowId + */ + public abstract byte[] getByteData(); + + /** + * Get short value at rowId + */ + public abstract short[] getShortData(); + + /** + * Get short int value at rowId + */ + public abstract int[] getShortIntData(); + + /** + * Get boolean value at rowId + */ + public byte[] getBooleanData() { + return getByteData(); + } + + /** + * Get int value at rowId + */ + public abstract int[] getIntData(); + + /** + * Get long value at rowId + */ + public abstract long[] getLongData(); + + /** + * Get float value at rowId + */ + public abstract float[] getFloatData(); + + /** + * Get double value at rowId + */ + public abstract double[] getDoubleData(); + + + + + /** * Get decimal value at rowId */ @@ -774,25 +813,25 @@ public static ColumnPage decompress(ColumnPageEncoderMeta meta, byte[] compresse DataType storeDataType = meta.getStoreDataType(); if (storeDataType == DataTypes.BOOLEAN || storeDataType == DataTypes.BYTE) { byte[] byteData = compressor.unCompressByte(compressedData, offset, length); - return newBytePage(columnSpec, byteData, meta.getCompressorName()); + return newBytePage(meta, byteData); } else if (storeDataType == DataTypes.SHORT) { short[] shortData = compressor.unCompressShort(compressedData, offset, length); - return newShortPage(columnSpec, shortData, meta.getCompressorName()); + return newShortPage(meta, shortData); } else if (storeDataType == DataTypes.SHORT_INT) { byte[] shortIntData = compressor.unCompressByte(compressedData, offset, length); - return newShortIntPage(columnSpec, shortIntData, meta.getCompressorName()); + return newShortIntPage(meta, shortIntData); } else if (storeDataType == DataTypes.INT) { int[] intData = compressor.unCompressInt(compressedData, offset, length); - return newIntPage(columnSpec, intData, meta.getCompressorName()); + return newIntPage(meta, intData); } else if (storeDataType == DataTypes.LONG) { long[] longData = compressor.unCompressLong(compressedData, offset, length); - return newLongPage(columnSpec, longData, meta.getCompressorName()); + return newLongPage(meta, longData); } else if (storeDataType == DataTypes.FLOAT) { float[] floatData = compressor.unCompressFloat(compressedData, offset, length); - return newFloatPage(columnSpec, floatData, meta.getCompressorName()); + return newFloatPage(meta, floatData); } else if (storeDataType == DataTypes.DOUBLE) { double[] doubleData = compressor.unCompressDouble(compressedData, offset, length); - return newDoublePage(columnSpec, doubleData, meta.getCompressorName()); + return newDoublePage(meta, doubleData); } else if (!isLVEncoded && storeDataType == DataTypes.BYTE_ARRAY && ( columnSpec.getColumnType() == ColumnType.COMPLEX_PRIMITIVE || columnSpec.getColumnType() == ColumnType.PLAIN_VALUE)) { @@ -834,8 +873,7 @@ public static ColumnPage decompress(ColumnPageEncoderMeta meta, byte[] compresse public static ColumnPage decompressDecimalPage(ColumnPageEncoderMeta meta, byte[] compressedData, int offset, int length) throws MemoryException { Compressor compressor = CompressorFactory.getInstance().getCompressor(meta.getCompressorName()); - TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); - ColumnPage decimalPage = null; + ColumnPage decimalPage; DataType storeDataType = meta.getStoreDataType(); if (storeDataType == DataTypes.BYTE) { byte[] byteData = compressor.unCompressByte(compressedData, offset, length); @@ -849,7 +887,7 @@ public static ColumnPage decompressDecimalPage(ColumnPageEncoderMeta meta, byte[ return decimalPage; } else if (storeDataType == DataTypes.SHORT_INT) { byte[] shortIntData = compressor.unCompressByte(compressedData, offset, length); - decimalPage = createDecimalPage(meta, shortIntData.length); + decimalPage = createDecimalPage(meta, shortIntData.length / 3); decimalPage.setShortIntPage(shortIntData); return decimalPage; } else if (storeDataType == DataTypes.INT) { @@ -864,7 +902,7 @@ public static ColumnPage decompressDecimalPage(ColumnPageEncoderMeta meta, byte[ return decimalPage; } else { byte[] lvEncodedBytes = compressor.unCompressByte(compressedData, offset, length); - return newDecimalPage(columnSpec, lvEncodedBytes, meta.getCompressorName()); + return newDecimalPage(meta, lvEncodedBytes); } } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPageValueConverter.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPageValueConverter.java index 53ad95655b4..82ccd2255df 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPageValueConverter.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPageValueConverter.java @@ -17,6 +17,8 @@ package org.apache.carbondata.core.datastore.page; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; + // Transformation type that can be applied to ColumnPage public interface ColumnPageValueConverter { void encode(int rowId, byte value); @@ -35,4 +37,5 @@ public interface ColumnPageValueConverter { double decodeDouble(long value); double decodeDouble(float value); double decodeDouble(double value); + void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo); } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java index 772916d01a1..95ce644cf0b 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/LazyColumnPage.java @@ -23,6 +23,7 @@ import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; /** * This is a decorator of column page, it performs decoding lazily (when caller calls getXXX @@ -42,10 +43,26 @@ private LazyColumnPage(ColumnPage columnPage, ColumnPageValueConverter converter this.converter = converter; } + private LazyColumnPage(ColumnPage columnPage, ColumnPageValueConverter converter, + ColumnVectorInfo vectorInfo) { + super(columnPage.getColumnPageEncoderMeta(), columnPage.getPageSize()); + this.columnPage = columnPage; + this.converter = converter; + if (columnPage instanceof DecimalColumnPage) { + vectorInfo.decimalConverter = ((DecimalColumnPage) columnPage).getDecimalConverter(); + } + converter.decodeAndFillVector(columnPage, vectorInfo); + } + public static ColumnPage newPage(ColumnPage columnPage, ColumnPageValueConverter codec) { return new LazyColumnPage(columnPage, codec); } + public static ColumnPage newPage(ColumnPage columnPage, ColumnPageValueConverter codec, + ColumnVectorInfo vectorInfo) { + return new LazyColumnPage(columnPage, codec, vectorInfo); + } + @Override public String toString() { return String.format("[converter: %s, data type: %s", converter, columnPage.getDataType()); @@ -91,6 +108,8 @@ public double getDouble(int rowId) { } } + + @Override public float getFloat(int rowId) { return (float) getDouble(rowId); @@ -300,4 +319,32 @@ public int getShortInt(int rowId) { public int getInt(int rowId) { throw new UnsupportedOperationException("internal error"); } + + @Override public byte[] getByteData() { + throw new UnsupportedOperationException("internal error"); + } + + @Override public short[] getShortData() { + throw new UnsupportedOperationException("internal error"); + } + + @Override public int[] getShortIntData() { + throw new UnsupportedOperationException("internal error"); + } + + @Override public int[] getIntData() { + throw new UnsupportedOperationException("internal error"); + } + + @Override public long[] getLongData() { + throw new UnsupportedOperationException("internal error"); + } + + @Override public float[] getFloatData() { + throw new UnsupportedOperationException("internal error"); + } + + @Override public double[] getDoubleData() { + throw new UnsupportedOperationException("internal error"); + } } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java index 3da154ae1fc..5cb60f8c745 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java @@ -285,6 +285,34 @@ private void freeEncodedColumnPage() { return actualDataColumnPage.getBytes(rowId); } + @Override public byte[] getByteData() { + return new byte[0]; + } + + @Override public short[] getShortData() { + return new short[0]; + } + + @Override public int[] getShortIntData() { + return new int[0]; + } + + @Override public int[] getIntData() { + return new int[0]; + } + + @Override public long[] getLongData() { + return new long[0]; + } + + @Override public float[] getFloatData() { + return new float[0]; + } + + @Override public double[] getDoubleData() { + return new double[0]; + } + @Override public byte[] getBytePage() { throw new UnsupportedOperationException("Operation not supported"); } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java index d3e945dc2d9..3526b209169 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeDecimalColumnPage.java @@ -193,6 +193,34 @@ public void convertValue(ColumnPageValueConverter codec) { } } + @Override public byte[] getByteData() { + return byteData; + } + + @Override public short[] getShortData() { + return shortData; + } + + @Override public int[] getShortIntData() { + int[] ints = new int[pageSize]; + for (int i = 0; i < ints.length; i++) { + ints[i] = ByteUtil.valueOf3Bytes(shortIntData, i * 3); + } + return ints; + } + + @Override public int[] getIntData() { + return intData; + } + + @Override public long[] getLongData() { + return longData; + } + + @Override public byte[][] getByteArrayPage() { + return byteArrayData; + } + @Override public void freeMemory() { byteArrayData = null; diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeFixLengthColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeFixLengthColumnPage.java index 3884d9bcd33..c0cf315bd4c 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeFixLengthColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/SafeFixLengthColumnPage.java @@ -202,6 +202,38 @@ public double getDouble(int rowId) { return doubleData[rowId]; } + @Override public byte[] getByteData() { + return byteData; + } + + @Override public short[] getShortData() { + return shortData; + } + + @Override public int[] getShortIntData() { + int[] ints = new int[pageSize]; + for (int i = 0; i < ints.length; i++) { + ints[i] = ByteUtil.valueOf3Bytes(shortIntData, i * 3); + } + return ints; + } + + @Override public int[] getIntData() { + return intData; + } + + @Override public long[] getLongData() { + return longData; + } + + @Override public float[] getFloatData() { + return floatData; + } + + @Override public double[] getDoubleData() { + return doubleData; + } + @Override public BigDecimal getDecimal(int rowId) { throw new UnsupportedOperationException( "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java index 7df29df8d3a..cb3cd6d2107 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java @@ -286,6 +286,34 @@ public double getDouble(int rowId) { return CarbonUnsafe.getUnsafe().getDouble(baseAddress, baseOffset + offset); } + @Override public byte[] getByteData() { + throw new UnsupportedOperationException("Not supported here"); + } + + @Override public short[] getShortData() { + throw new UnsupportedOperationException("Not supported here"); + } + + @Override public int[] getShortIntData() { + throw new UnsupportedOperationException("Not supported here"); + } + + @Override public int[] getIntData() { + throw new UnsupportedOperationException("Not supported here"); + } + + @Override public long[] getLongData() { + throw new UnsupportedOperationException("Not supported here"); + } + + @Override public float[] getFloatData() { + throw new UnsupportedOperationException("Not supported here"); + } + + @Override public double[] getDoubleData() { + throw new UnsupportedOperationException("Not supported here"); + } + @Override public BigDecimal getDecimal(int rowId) { throw new UnsupportedOperationException( diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java index 35d00095c08..6a9426f01d7 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java @@ -124,8 +124,9 @@ public void setDoublePage(double[] doubleData) { /** * Create a new column page for decimal page */ - static ColumnPage newDecimalColumnPage(TableSpec.ColumnSpec columnSpec, byte[] lvEncodedBytes, - String compressorName) throws MemoryException { + static ColumnPage newDecimalColumnPage(ColumnPageEncoderMeta meta, + byte[] lvEncodedBytes) throws MemoryException { + TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); DecimalConverterFactory.DecimalConverter decimalConverter = DecimalConverterFactory.INSTANCE.getDecimalConverter(columnSpec.getPrecision(), columnSpec.getScale()); @@ -133,10 +134,10 @@ static ColumnPage newDecimalColumnPage(TableSpec.ColumnSpec columnSpec, byte[] l if (size < 0) { return getLVBytesColumnPage(columnSpec, lvEncodedBytes, DataTypes.createDecimalType(columnSpec.getPrecision(), columnSpec.getScale()), - CarbonCommonConstants.INT_SIZE_IN_BYTE, compressorName); + CarbonCommonConstants.INT_SIZE_IN_BYTE, meta.getCompressorName()); } else { // Here the size is always fixed. - return getDecimalColumnPage(columnSpec, lvEncodedBytes, size, compressorName); + return getDecimalColumnPage(meta, lvEncodedBytes, size); } } @@ -158,8 +159,10 @@ static ColumnPage newComplexLVBytesColumnPage(TableSpec.ColumnSpec columnSpec, lvLength, compressorName); } - private static ColumnPage getDecimalColumnPage(TableSpec.ColumnSpec columnSpec, - byte[] lvEncodedBytes, int size, String compressorName) throws MemoryException { + private static ColumnPage getDecimalColumnPage(ColumnPageEncoderMeta meta, + byte[] lvEncodedBytes, int size) throws MemoryException { + TableSpec.ColumnSpec columnSpec = meta.getColumnSpec(); + String compressorName = meta.getCompressorName(); TableSpec.ColumnSpec spec = TableSpec.ColumnSpec .newInstance(columnSpec.getFieldName(), DataTypes.INT, ColumnType.MEASURE); ColumnPage rowOffset = ColumnPage.newPage( @@ -176,7 +179,7 @@ private static ColumnPage getDecimalColumnPage(TableSpec.ColumnSpec columnSpec, rowOffset.putInt(counter, offset); VarLengthColumnPageBase page; - if (unsafe) { + if (unsafe && !meta.isFillCompleteVector()) { page = new UnsafeDecimalColumnPage( new ColumnPageEncoderMeta(columnSpec, columnSpec.getSchemaDataType(), compressorName), rowId); @@ -428,6 +431,41 @@ public double[] getDoublePage() { "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); } + @Override public byte[] getByteData() { + throw new UnsupportedOperationException( + "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); + } + + @Override public short[] getShortData() { + throw new UnsupportedOperationException( + "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); + } + + @Override public int[] getShortIntData() { + throw new UnsupportedOperationException( + "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); + } + + @Override public int[] getIntData() { + throw new UnsupportedOperationException( + "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); + } + + @Override public long[] getLongData() { + throw new UnsupportedOperationException( + "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); + } + + @Override public float[] getFloatData() { + throw new UnsupportedOperationException( + "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); + } + + @Override public double[] getDoubleData() { + throw new UnsupportedOperationException( + "invalid data type: " + columnPageEncoderMeta.getStoreDataType()); + } + @Override public byte[] getDecimalPage() { // output LV encoded byte array diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java index 4e491c548bb..20dcc9e0bbc 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageDecoder.java @@ -18,9 +18,11 @@ package org.apache.carbondata.core.datastore.page.encoding; import java.io.IOException; +import java.util.BitSet; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.memory.MemoryException; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; public interface ColumnPageDecoder { @@ -29,6 +31,12 @@ public interface ColumnPageDecoder { */ ColumnPage decode(byte[] input, int offset, int length) throws MemoryException, IOException; + /** + * Apply decoding algorithm on input byte array and fill the vector here. + */ + ColumnPage decodeAndFillVector(byte[] input, int offset, int length, ColumnVectorInfo vectorInfo, + BitSet nullBits, boolean isLVEncoded) throws MemoryException, IOException; + ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) throws MemoryException, IOException; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java index e6aafa0df1e..f747e1ab759 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoderMeta.java @@ -49,6 +49,8 @@ public class ColumnPageEncoderMeta extends ValueEncoderMeta implements Writable // Make it protected for RLEEncoderMeta protected String compressorName; + private transient boolean fillCompleteVector; + public ColumnPageEncoderMeta() { } @@ -284,4 +286,12 @@ public String getCompressorName() { public DataType getSchemaDataType() { return columnSpec.getSchemaDataType(); } + + public boolean isFillCompleteVector() { + return fillCompleteVector; + } + + public void setFillCompleteVector(boolean fillCompleteVector) { + this.fillCompleteVector = fillCompleteVector; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java index 920a51626ca..55c06bb2f73 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/EncodingFactory.java @@ -66,6 +66,14 @@ public abstract ColumnPageEncoder createEncoder(TableSpec.ColumnSpec columnSpec, */ public ColumnPageDecoder createDecoder(List encodings, List encoderMetas, String compressor) throws IOException { + return createDecoder(encodings, encoderMetas, compressor, false); + } + + /** + * Return new decoder based on encoder metadata read from file + */ + public ColumnPageDecoder createDecoder(List encodings, List encoderMetas, + String compressor, boolean fullVectorFill) throws IOException { assert (encodings.size() >= 1); assert (encoderMetas.size() == 1); Encoding encoding = encodings.get(0); @@ -74,16 +82,19 @@ public ColumnPageDecoder createDecoder(List encodings, List encodings, List encodings, List encodings, List= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + } + @Override public double decodeDouble(float value) { throw new RuntimeException("internal error: " + debugInfo()); diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaIntegralCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaIntegralCodec.java index 0e61b33b362..7e89ca2f8ca 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaIntegralCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveDeltaIntegralCodec.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.Map; @@ -35,6 +36,11 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertableVector; import org.apache.carbondata.format.DataChunk2; import org.apache.carbondata.format.Encoding; @@ -119,9 +125,11 @@ protected void fillLegacyFields(DataChunk2 dataChunk) throws IOException { }; } - @Override public ColumnPageDecoder createDecoder(final ColumnPageEncoderMeta meta) { + @Override + public ColumnPageDecoder createDecoder(final ColumnPageEncoderMeta meta) { return new ColumnPageDecoder() { - @Override public ColumnPage decode(byte[] input, int offset, int length) + @Override + public ColumnPage decode(byte[] input, int offset, int length) throws MemoryException, IOException { ColumnPage page = null; if (DataTypes.isDecimal(meta.getSchemaDataType())) { @@ -132,7 +140,22 @@ protected void fillLegacyFields(DataChunk2 dataChunk) throws IOException { return LazyColumnPage.newPage(page, converter); } - @Override public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) + @Override + public ColumnPage decodeAndFillVector(byte[] input, int offset, int length, + ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded) + throws MemoryException, IOException { + ColumnPage page = null; + if (DataTypes.isDecimal(meta.getSchemaDataType())) { + page = ColumnPage.decompressDecimalPage(meta, input, offset, length); + } else { + page = ColumnPage.decompress(meta, input, offset, length, isLVEncoded); + } + page.setNullBits(nullBits); + return LazyColumnPage.newPage(page, converter, vectorInfo); + } + + @Override + public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) throws MemoryException, IOException { return decode(input, offset, length); } @@ -272,5 +295,189 @@ public double decodeDouble(double value) { // this codec is for integer type only throw new RuntimeException("internal error"); } + + @Override + public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet nullBits = columnPage.getNullBits(); + DataType dataType = vector.getType(); + DataType type = columnPage.getDataType(); + int pageSize = columnPage.getPageSize(); + BitSet deletedRows = vectorInfo.deletedRows; + vector = ColumnarVectorWrapperDirectFactory + .getDirectVectorWrapperFactory(vector, vectorInfo.invertedIndex, nullBits, deletedRows, + true); + fillVector(columnPage, vector, dataType, type, pageSize, vectorInfo); + if (deletedRows == null || deletedRows.isEmpty()) { + for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + if (vector instanceof ConvertableVector) { + ((ConvertableVector) vector).convert(); + } + } + + private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, DataType dataType, + DataType type, int pageSize, ColumnVectorInfo vectorInfo) { + int newScale = 0; + if (vectorInfo.measure != null) { + newScale = vectorInfo.measure.getMeasure().getScale(); + } + if (type == DataTypes.BOOLEAN || type == DataTypes.BYTE) { + byte[] byteData = columnPage.getByteData(); + if (dataType == DataTypes.SHORT) { + for (int i = 0; i < pageSize; i++) { + vector.putShort(i, (short) (max - byteData[i])); + } + } else if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) (max - byteData[i])); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - byteData[i])); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - byteData[i]) * 1000); + } + } else if (dataType == DataTypes.BOOLEAN) { + for (int i = 0; i < pageSize; i++) { + vector.putByte(i, (byte) (max - byteData[i])); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + int precision = vectorInfo.measure.getMeasure().getPrecision(); + for (int i = 0; i < pageSize; i++) { + BigDecimal decimal = decimalConverter.getDecimal(max - byteData[i]); + if (decimal.scale() < newScale) { + decimal = decimal.setScale(newScale); + } + vector.putDecimal(i, decimal, precision); + } + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (max - byteData[i])); + } + } + } else if (type == DataTypes.SHORT) { + short[] shortData = columnPage.getShortData(); + if (dataType == DataTypes.SHORT) { + for (int i = 0; i < pageSize; i++) { + vector.putShort(i, (short) (max - shortData[i])); + } + } else if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) (max - shortData[i])); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - shortData[i])); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - shortData[i]) * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + int precision = vectorInfo.measure.getMeasure().getPrecision(); + for (int i = 0; i < pageSize; i++) { + BigDecimal decimal = decimalConverter.getDecimal(max - shortData[i]); + if (decimal.scale() < newScale) { + decimal = decimal.setScale(newScale); + } + vector.putDecimal(i, decimal, precision); + } + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (max - shortData[i])); + } + } + + } else if (type == DataTypes.SHORT_INT) { + int[] shortIntData = columnPage.getShortIntData(); + if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) (max - shortIntData[i])); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - shortIntData[i])); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - shortIntData[i]) * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + int precision = vectorInfo.measure.getMeasure().getPrecision(); + for (int i = 0; i < pageSize; i++) { + BigDecimal decimal = decimalConverter.getDecimal(max - shortIntData[i]); + if (decimal.scale() < newScale) { + decimal = decimal.setScale(newScale); + } + vector.putDecimal(i, decimal, precision); + } + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (max - shortIntData[i])); + } + } + } else if (type == DataTypes.INT) { + int[] intData = columnPage.getIntData(); + if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) (max - intData[i])); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - intData[i])); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - intData[i]) * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + int precision = vectorInfo.measure.getMeasure().getPrecision(); + for (int i = 0; i < pageSize; i++) { + BigDecimal decimal = decimalConverter.getDecimal(max - intData[i]); + if (decimal.scale() < newScale) { + decimal = decimal.setScale(newScale); + } + vector.putDecimal(i, decimal, precision); + } + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (max - intData[i])); + } + } + } else if (type == DataTypes.LONG) { + long[] longData = columnPage.getLongData(); + if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - longData[i])); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, (max - longData[i]) * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + int precision = vectorInfo.measure.getMeasure().getPrecision(); + for (int i = 0; i < pageSize; i++) { + BigDecimal decimal = decimalConverter.getDecimal(max - longData[i]); + if (decimal.scale() < newScale) { + decimal = decimal.setScale(newScale); + } + vector.putDecimal(i, decimal, precision); + } + } + } else { + throw new RuntimeException("internal error: " + this.toString()); + } + } + }; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java index 836af26ee6b..bc152308a9d 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveFloatingCodec.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.Map; @@ -34,6 +35,9 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory; import org.apache.carbondata.format.DataChunk2; import org.apache.carbondata.format.Encoding; @@ -113,7 +117,17 @@ public ColumnPage decode(byte[] input, int offset, int length) return LazyColumnPage.newPage(page, converter); } - @Override public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) + @Override + public ColumnPage decodeAndFillVector(byte[] input, int offset, int length, + ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded) + throws MemoryException, IOException { + ColumnPage page = ColumnPage.decompress(meta, input, offset, length, isLVEncoded); + page.setNullBits(nullBits); + return LazyColumnPage.newPage(page, converter, vectorInfo); + } + + @Override + public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) throws MemoryException, IOException { return decode(input, offset, length); } @@ -226,5 +240,69 @@ public double decodeDouble(float value) { public double decodeDouble(double value) { throw new RuntimeException("internal error: " + debugInfo()); } + + @Override + public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet nullBits = columnPage.getNullBits(); + DataType type = columnPage.getDataType(); + int pageSize = columnPage.getPageSize(); + BitSet deletedRows = vectorInfo.deletedRows; + DataType dataType = vector.getType(); + vector = ColumnarVectorWrapperDirectFactory + .getDirectVectorWrapperFactory(vector, null, nullBits, deletedRows, true); + if (dataType == DataTypes.FLOAT) { + if (type == DataTypes.BOOLEAN || type == DataTypes.BYTE) { + byte[] byteData = columnPage.getByteData(); + for (int i = 0; i < pageSize; i++) { + vector.putFloat(i, (byteData[i] / floatFactor)); + } + } else if (type == DataTypes.SHORT) { + short[] shortData = columnPage.getShortData(); + for (int i = 0; i < pageSize; i++) { + vector.putFloat(i, (shortData[i] / floatFactor)); + } + + } else if (type == DataTypes.SHORT_INT) { + int[] shortIntData = columnPage.getShortIntData(); + for (int i = 0; i < pageSize; i++) { + vector.putFloat(i, (shortIntData[i] / floatFactor)); + } + } else { + throw new RuntimeException("internal error: " + this.toString()); + } + } else { + if (type == DataTypes.BOOLEAN || type == DataTypes.BYTE) { + byte[] byteData = columnPage.getByteData(); + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (byteData[i] / factor)); + } + } else if (type == DataTypes.SHORT) { + short[] shortData = columnPage.getShortData(); + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (shortData[i] / factor)); + } + + } else if (type == DataTypes.SHORT_INT) { + int[] shortIntData = columnPage.getShortIntData(); + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (shortIntData[i] / factor)); + } + } else if (type == DataTypes.INT) { + int[] intData = columnPage.getIntData(); + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, (intData[i] / factor)); + } + } else { + throw new RuntimeException("internal error: " + this.toString()); + } + } + + if (deletedRows == null || deletedRows.isEmpty()) { + for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + } }; } \ No newline at end of file diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java index f1c0ea0e69a..99111f252d3 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/adaptive/AdaptiveIntegralCodec.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.Map; @@ -34,6 +35,11 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertableVector; import org.apache.carbondata.format.DataChunk2; import org.apache.carbondata.format.Encoding; @@ -111,6 +117,20 @@ public ColumnPage decode(byte[] input, int offset, int length) return LazyColumnPage.newPage(page, converter); } + @Override + public ColumnPage decodeAndFillVector(byte[] input, int offset, int length, + ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded) + throws MemoryException, IOException { + ColumnPage page = null; + if (DataTypes.isDecimal(meta.getSchemaDataType())) { + page = ColumnPage.decompressDecimalPage(meta, input, offset, length); + } else { + page = ColumnPage.decompress(meta, input, offset, length, isLVEncoded); + } + page.setNullBits(nullBits); + return LazyColumnPage.newPage(page, converter, vectorInfo); + } + @Override public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) throws MemoryException, IOException { return decode(input, offset, length); @@ -248,6 +268,143 @@ public double decodeDouble(float value) { public double decodeDouble(double value) { throw new RuntimeException("internal error: " + debugInfo()); } + + @Override + public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet nullBits = columnPage.getNullBits(); + DataType dataType = vector.getType(); + DataType type = columnPage.getDataType(); + int pageSize = columnPage.getPageSize(); + BitSet deletedRows = vectorInfo.deletedRows; + vector = ColumnarVectorWrapperDirectFactory + .getDirectVectorWrapperFactory(vector, vectorInfo.invertedIndex, nullBits, deletedRows, + true); + fillVector(columnPage, vector, dataType, type, pageSize, vectorInfo); + if (deletedRows == null || deletedRows.isEmpty()) { + for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + if (vector instanceof ConvertableVector) { + ((ConvertableVector) vector).convert(); + } + + } + + private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, DataType dataType, + DataType type, int pageSize, ColumnVectorInfo vectorInfo) { + if (type == DataTypes.BOOLEAN || type == DataTypes.BYTE) { + byte[] byteData = columnPage.getByteData(); + if (dataType == DataTypes.SHORT) { + for (int i = 0; i < pageSize; i++) { + vector.putShort(i, (short) byteData[i]); + } + } else if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) byteData[i]); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i] * 1000); + } + } else if (dataType == DataTypes.BOOLEAN) { + vector.putBytes(0, pageSize, byteData, 0); + + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(byteData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, byteData[i]); + } + } + } else if (type == DataTypes.SHORT) { + short[] shortData = columnPage.getShortData(); + if (dataType == DataTypes.SHORT) { + vector.putShorts(0, pageSize, shortData, 0); + } else if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) shortData[i]); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(shortData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, shortData[i]); + } + } + + } else if (type == DataTypes.SHORT_INT) { + int[] shortIntData = columnPage.getShortIntData(); + if (dataType == DataTypes.INT) { + vector.putInts(0, pageSize, shortIntData, 0); + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortIntData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortIntData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(shortIntData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, shortIntData[i]); + } + } + } else if (type == DataTypes.INT) { + int[] intData = columnPage.getIntData(); + if (dataType == DataTypes.INT) { + vector.putInts(0, pageSize, intData, 0); + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, intData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, intData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(intData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, intData[i]); + } + } + } else if (type == DataTypes.LONG) { + long[] longData = columnPage.getLongData(); + if (dataType == DataTypes.LONG) { + vector.putLongs(0, pageSize, longData, 0); + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, longData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(longData, pageSize, vectorInfo, columnPage.getNullBits()); + } + } else { + double[] doubleData = columnPage.getDoubleData(); + vector.putDoubles(0, pageSize, doubleData, 0); + } + } }; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java index aa03ec111a0..42ff57bf3d8 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/compress/DirectCompressCodec.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.Map; @@ -34,6 +35,11 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ColumnarVectorWrapperDirectFactory; +import org.apache.carbondata.core.scan.result.vector.impl.directread.ConvertableVector; import org.apache.carbondata.format.Encoding; /** @@ -95,10 +101,24 @@ public ColumnPage decode(byte[] input, int offset, int length) throws MemoryExce return LazyColumnPage.newPage(decodedPage, converter); } + @Override + public ColumnPage decodeAndFillVector(byte[] input, int offset, int length, + ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded) + throws MemoryException, IOException { + ColumnPage decodedPage; + if (DataTypes.isDecimal(dataType)) { + decodedPage = ColumnPage.decompressDecimalPage(meta, input, offset, length); + } else { + decodedPage = ColumnPage.decompress(meta, input, offset, length, isLVEncoded); + } + decodedPage.setNullBits(nullBits); + return LazyColumnPage.newPage(decodedPage, converter, vectorInfo); + } + @Override public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) - throws MemoryException, IOException { - return LazyColumnPage.newPage( - ColumnPage.decompress(meta, input, offset, length, isLVEncoded), converter); + throws MemoryException, IOException { + return LazyColumnPage + .newPage(ColumnPage.decompress(meta, input, offset, length, isLVEncoded), converter); } }; } @@ -178,6 +198,149 @@ public double decodeDouble(float value) { public double decodeDouble(double value) { return value; } + + @Override public void decodeAndFillVector(ColumnPage columnPage, ColumnVectorInfo vectorInfo) { + CarbonColumnVector vector = vectorInfo.vector; + BitSet nullBits = columnPage.getNullBits(); + DataType dataType = vector.getType(); + DataType type = columnPage.getDataType(); + int pageSize = columnPage.getPageSize(); + BitSet deletedRows = vectorInfo.deletedRows; + vector = ColumnarVectorWrapperDirectFactory + .getDirectVectorWrapperFactory(vector, vectorInfo.invertedIndex, nullBits, deletedRows, + true); + fillVector(columnPage, vector, dataType, type, pageSize, vectorInfo); + if (deletedRows == null || deletedRows.isEmpty()) { + for (int i = nullBits.nextSetBit(0); i >= 0; i = nullBits.nextSetBit(i + 1)) { + vector.putNull(i); + } + } + if (vector instanceof ConvertableVector) { + ((ConvertableVector) vector).convert(); + } + } + + private void fillVector(ColumnPage columnPage, CarbonColumnVector vector, DataType dataType, + DataType type, int pageSize, ColumnVectorInfo vectorInfo) { + if (type == DataTypes.BOOLEAN || type == DataTypes.BYTE) { + byte[] byteData = columnPage.getByteData(); + if (dataType == DataTypes.SHORT) { + for (int i = 0; i < pageSize; i++) { + vector.putShort(i, (short) byteData[i]); + } + } else if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) byteData[i]); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, byteData[i] * 1000); + } + } else if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { + vector.putBytes(0, pageSize, byteData, 0); + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(byteData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, byteData[i]); + } + } + } else if (type == DataTypes.SHORT) { + short[] shortData = columnPage.getShortData(); + if (dataType == DataTypes.SHORT) { + vector.putShorts(0, pageSize, shortData, 0); + } else if (dataType == DataTypes.INT) { + for (int i = 0; i < pageSize; i++) { + vector.putInt(i, (int) shortData[i]); + } + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(shortData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, shortData[i]); + } + } + + } else if (type == DataTypes.SHORT_INT) { + int[] shortIntData = columnPage.getShortIntData(); + if (dataType == DataTypes.INT) { + vector.putInts(0, pageSize, shortIntData, 0); + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortIntData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, shortIntData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(shortIntData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, shortIntData[i]); + } + } + } else if (type == DataTypes.INT) { + int[] intData = columnPage.getIntData(); + if (dataType == DataTypes.INT) { + vector.putInts(0, pageSize, intData, 0); + } else if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, intData[i]); + } + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, intData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(intData, pageSize, vectorInfo, columnPage.getNullBits()); + } else { + for (int i = 0; i < pageSize; i++) { + vector.putDouble(i, intData[i]); + } + } + } else if (type == DataTypes.LONG) { + long[] longData = columnPage.getLongData(); + if (dataType == DataTypes.LONG) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, longData[i]); + } + vector.putLongs(0, pageSize, longData, 0); + } else if (dataType == DataTypes.TIMESTAMP) { + for (int i = 0; i < pageSize; i++) { + vector.putLong(i, longData[i] * 1000); + } + } else if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(longData, pageSize, vectorInfo, columnPage.getNullBits()); + } + } else if (DataTypes.isDecimal(type)) { + if (DataTypes.isDecimal(dataType)) { + DecimalConverterFactory.DecimalConverter decimalConverter = vectorInfo.decimalConverter; + decimalConverter.fillVector(columnPage.getByteArrayPage(), pageSize, vectorInfo, + columnPage.getNullBits()); + } + } else { + double[] doubleData = columnPage.getDoubleData(); + vector.putDoubles(0, pageSize, doubleData, 0); + } + } }; } diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/rle/RLECodec.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/rle/RLECodec.java index e7d41183447..3018dbe78f8 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/rle/RLECodec.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/rle/RLECodec.java @@ -23,6 +23,7 @@ import java.io.DataOutputStream; import java.io.IOException; import java.util.ArrayList; +import java.util.BitSet; import java.util.List; import java.util.Map; @@ -35,6 +36,7 @@ import org.apache.carbondata.core.memory.MemoryException; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; import org.apache.carbondata.format.Encoding; /** @@ -314,6 +316,13 @@ public ColumnPage decode(byte[] input, int offset, int length) return resultPage; } + @Override + public ColumnPage decodeAndFillVector(byte[] input, int offset, int length, + ColumnVectorInfo vectorInfo, BitSet nullBits, boolean isLVEncoded) + throws MemoryException, IOException { + return decode(input, offset, length); + } + @Override public ColumnPage decode(byte[] input, int offset, int length, boolean isLVEncoded) throws MemoryException, IOException { return decode(input, offset, length); diff --git a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java index a49eced8d4a..67d70e39808 100644 --- a/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java +++ b/core/src/main/java/org/apache/carbondata/core/keygenerator/directdictionary/timestamp/DateDirectDictionaryGenerator.java @@ -35,7 +35,7 @@ */ public class DateDirectDictionaryGenerator implements DirectDictionaryGenerator { - private static final int cutOffDate = Integer.MAX_VALUE >> 1; + public static final int cutOffDate = Integer.MAX_VALUE >> 1; private static final long SECONDS_PER_DAY = 60 * 60 * 24L; public static final long MILLIS_PER_DAY = SECONDS_PER_DAY * 1000L; diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java index a8da6d4486d..5231cb95a49 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/datatype/DecimalConverterFactory.java @@ -19,7 +19,10 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.util.Arrays; +import java.util.BitSet; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; import org.apache.carbondata.core.util.DataTypeUtil; /** @@ -72,6 +75,8 @@ public interface DecimalConverter { BigDecimal getDecimal(Object valueToBeConverted); + void fillVector(Object valuesToBeConverted, int size, ColumnVectorInfo info, BitSet nullBitset); + int getSize(); DecimalConverterType getDecimalConverterType(); @@ -80,7 +85,7 @@ public interface DecimalConverter { public static class DecimalIntConverter implements DecimalConverter { - private int scale; + protected int scale; DecimalIntConverter(int scale) { this.scale = scale; @@ -95,6 +100,68 @@ public static class DecimalIntConverter implements DecimalConverter { return BigDecimal.valueOf((Long) valueToBeConverted, scale); } + @Override public void fillVector(Object valuesToBeConverted, int size, ColumnVectorInfo info, + BitSet nullBitset) { + // TODO we need to find way to directly set to vector with out conversion. This way is very + // inefficient. + CarbonColumnVector vector = info.vector; + int precision = info.measure.getMeasure().getPrecision(); + int newMeasureScale = info.measure.getMeasure().getScale(); + if (valuesToBeConverted instanceof byte[]) { + byte[] data = (byte[]) valuesToBeConverted; + for (int i = 0; i < size; i++) { + if (nullBitset.get(i)) { + vector.putNull(i); + } else { + BigDecimal value = BigDecimal.valueOf(data[i], scale); + if (value.scale() < newMeasureScale) { + value = value.setScale(newMeasureScale); + } + vector.putDecimal(i, value, precision); + } + } + } else if (valuesToBeConverted instanceof short[]) { + short[] data = (short[]) valuesToBeConverted; + for (int i = 0; i < size; i++) { + if (nullBitset.get(i)) { + vector.putNull(i); + } else { + BigDecimal value = BigDecimal.valueOf(data[i], scale); + if (value.scale() < newMeasureScale) { + value = value.setScale(newMeasureScale); + } + vector.putDecimal(i, value, precision); + } + } + } else if (valuesToBeConverted instanceof int[]) { + int[] data = (int[]) valuesToBeConverted; + for (int i = 0; i < size; i++) { + if (nullBitset.get(i)) { + vector.putNull(i); + } else { + BigDecimal value = BigDecimal.valueOf(data[i], scale); + if (value.scale() < newMeasureScale) { + value = value.setScale(newMeasureScale); + } + vector.putDecimal(i, value, precision); + } + } + } else if (valuesToBeConverted instanceof long[]) { + long[] data = (long[]) valuesToBeConverted; + for (int i = 0; i < size; i++) { + if (nullBitset.get(i)) { + vector.putNull(i); + } else { + BigDecimal value = BigDecimal.valueOf(data[i], scale); + if (value.scale() < newMeasureScale) { + value = value.setScale(newMeasureScale); + } + vector.putDecimal(i, value, precision); + } + } + } + } + @Override public int getSize() { return 4; } @@ -104,12 +171,10 @@ public static class DecimalIntConverter implements DecimalConverter { } } - public static class DecimalLongConverter implements DecimalConverter { - - private int scale; + public static class DecimalLongConverter extends DecimalIntConverter { DecimalLongConverter(int scale) { - this.scale = scale; + super(scale); } @Override public Object convert(BigDecimal decimal) { @@ -173,6 +238,31 @@ public class DecimalUnscaledConverter implements DecimalConverter { return new BigDecimal(bigInteger, scale); } + @Override public void fillVector(Object valuesToBeConverted, int size, ColumnVectorInfo info, + BitSet nullBitset) { + CarbonColumnVector vector = info.vector; + int precision = info.measure.getMeasure().getPrecision(); + int newMeasureScale = info.measure.getMeasure().getScale(); + if (scale < newMeasureScale) { + scale = newMeasureScale; + } + if (valuesToBeConverted instanceof byte[][]) { + byte[][] data = (byte[][]) valuesToBeConverted; + for (int i = 0; i < size; i++) { + if (nullBitset.get(i)) { + vector.putNull(i); + } else { + BigInteger bigInteger = new BigInteger(data[i]); + BigDecimal value = new BigDecimal(bigInteger, scale); + if (value.scale() < newMeasureScale) { + value = value.setScale(newMeasureScale); + } + vector.putDecimal(i, value, precision); + } + } + } + } + @Override public int getSize() { return numBytes; } @@ -194,6 +284,27 @@ public static class LVBytesDecimalConverter implements DecimalConverter { return DataTypeUtil.byteToBigDecimal((byte[]) valueToBeConverted); } + @Override public void fillVector(Object valuesToBeConverted, int size, ColumnVectorInfo info, + BitSet nullBitset) { + CarbonColumnVector vector = info.vector; + int precision = info.measure.getMeasure().getPrecision(); + int newMeasureScale = info.measure.getMeasure().getScale(); + if (valuesToBeConverted instanceof byte[][]) { + byte[][] data = (byte[][]) valuesToBeConverted; + for (int i = 0; i < size; i++) { + if (nullBitset.get(i)) { + vector.putNull(i); + } else { + BigDecimal value = DataTypeUtil.byteToBigDecimal(data[i]); + if (value.scale() < newMeasureScale) { + value = value.setScale(newMeasureScale); + } + vector.putDecimal(i, value, precision); + } + } + } + } + @Override public int getSize() { return -1; } diff --git a/core/src/main/java/org/apache/carbondata/core/mutate/DeleteDeltaVo.java b/core/src/main/java/org/apache/carbondata/core/mutate/DeleteDeltaVo.java index d68e4e988c0..ac50d7c8d59 100644 --- a/core/src/main/java/org/apache/carbondata/core/mutate/DeleteDeltaVo.java +++ b/core/src/main/java/org/apache/carbondata/core/mutate/DeleteDeltaVo.java @@ -57,4 +57,8 @@ public void insertData(Set data) { public boolean containsRow(int counter) { return bitSet.get(counter); } + + public BitSet getBitSet() { + return bitSet; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/ResultCollectorFactory.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/ResultCollectorFactory.java index 68f8ae6bfe7..4cd1910bb83 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/collector/ResultCollectorFactory.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/ResultCollectorFactory.java @@ -17,16 +17,7 @@ package org.apache.carbondata.core.scan.collector; import org.apache.carbondata.common.logging.LogServiceFactory; -import org.apache.carbondata.core.scan.collector.impl.AbstractScannedResultCollector; -import org.apache.carbondata.core.scan.collector.impl.DictionaryBasedResultCollector; -import org.apache.carbondata.core.scan.collector.impl.DictionaryBasedVectorResultCollector; -import org.apache.carbondata.core.scan.collector.impl.RawBasedResultCollector; -import org.apache.carbondata.core.scan.collector.impl.RestructureBasedDictionaryResultCollector; -import org.apache.carbondata.core.scan.collector.impl.RestructureBasedRawResultCollector; -import org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector; -import org.apache.carbondata.core.scan.collector.impl.RowIdBasedResultCollector; -import org.apache.carbondata.core.scan.collector.impl.RowIdRawBasedResultCollector; -import org.apache.carbondata.core.scan.collector.impl.RowIdRestructureBasedRawResultCollector; +import org.apache.carbondata.core.scan.collector.impl.*; import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo; import org.apache.log4j.Logger; @@ -74,8 +65,13 @@ public static AbstractScannedResultCollector getScannedResultCollector( LOGGER.info("Restructure dictionary vector collector is used to scan and collect the data"); scannerResultAggregator = new RestructureBasedVectorResultCollector(blockExecutionInfo); } else { - LOGGER.info("Vector based dictionary collector is used to scan and collect the data"); - scannerResultAggregator = new DictionaryBasedVectorResultCollector(blockExecutionInfo); + if (blockExecutionInfo.isDirectVectorFill()) { + LOGGER.info("Direct pagewise vector fill collector is used to scan and collect the data"); + scannerResultAggregator = new DirectPageWiseVectorFillResultCollector(blockExecutionInfo); + } else { + LOGGER.info("Vector based dictionary collector is used to scan and collect the data"); + scannerResultAggregator = new DictionaryBasedVectorResultCollector(blockExecutionInfo); + } } } else { if (blockExecutionInfo.isRestructuredBlock()) { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DirectPageWiseVectorFillResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DirectPageWiseVectorFillResultCollector.java new file mode 100644 index 00000000000..989d4f25d12 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/DirectPageWiseVectorFillResultCollector.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.scan.collector.impl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.List; + +import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; +import org.apache.carbondata.core.metadata.encoder.Encoding; +import org.apache.carbondata.core.mutate.DeleteDeltaVo; +import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo; +import org.apache.carbondata.core.scan.model.ProjectionDimension; +import org.apache.carbondata.core.scan.model.ProjectionMeasure; +import org.apache.carbondata.core.scan.result.BlockletScannedResult; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnarBatch; +import org.apache.carbondata.core.scan.result.vector.ColumnVectorInfo; +import org.apache.carbondata.core.scan.result.vector.MeasureDataVectorProcessor; + +/** + * It delegates the vector to fill the data directly from decoded pages. + */ +public class DirectPageWiseVectorFillResultCollector extends AbstractScannedResultCollector { + + protected ProjectionDimension[] queryDimensions; + + protected ProjectionMeasure[] queryMeasures; + + private ColumnVectorInfo[] dictionaryInfo; + + private ColumnVectorInfo[] noDictionaryInfo; + + private ColumnVectorInfo[] complexInfo; + + private ColumnVectorInfo[] measureColumnInfo; + + ColumnVectorInfo[] allColumnInfo; + + public DirectPageWiseVectorFillResultCollector(BlockExecutionInfo blockExecutionInfos) { + super(blockExecutionInfos); + // initialize only if the current block is not a restructured block else the initialization + // will be taken care by RestructureBasedVectorResultCollector + if (!blockExecutionInfos.isRestructuredBlock()) { + queryDimensions = executionInfo.getProjectionDimensions(); + queryMeasures = executionInfo.getProjectionMeasures(); + allColumnInfo = new ColumnVectorInfo[queryDimensions.length + queryMeasures.length]; + prepareDimensionAndMeasureColumnVectors(); + } + } + + private void prepareDimensionAndMeasureColumnVectors() { + measureColumnInfo = new ColumnVectorInfo[queryMeasures.length]; + List dictInfoList = new ArrayList<>(); + List noDictInfoList = new ArrayList<>(); + List complexList = new ArrayList<>(); + for (int i = 0; i < queryDimensions.length; i++) { + if (!dimensionInfo.getDimensionExists()[i]) { + continue; + } + if (queryDimensions[i].getDimension().hasEncoding(Encoding.IMPLICIT)) { + ColumnVectorInfo columnVectorInfo = new ColumnVectorInfo(); + columnVectorInfo.dimension = queryDimensions[i]; + columnVectorInfo.ordinal = queryDimensions[i].getDimension().getOrdinal(); + allColumnInfo[queryDimensions[i].getOrdinal()] = columnVectorInfo; + } else if (!queryDimensions[i].getDimension().hasEncoding(Encoding.DICTIONARY)) { + ColumnVectorInfo columnVectorInfo = new ColumnVectorInfo(); + noDictInfoList.add(columnVectorInfo); + columnVectorInfo.dimension = queryDimensions[i]; + columnVectorInfo.ordinal = queryDimensions[i].getDimension().getOrdinal(); + allColumnInfo[queryDimensions[i].getOrdinal()] = columnVectorInfo; + } else if (queryDimensions[i].getDimension().hasEncoding(Encoding.DIRECT_DICTIONARY)) { + ColumnVectorInfo columnVectorInfo = new ColumnVectorInfo(); + dictInfoList.add(columnVectorInfo); + columnVectorInfo.dimension = queryDimensions[i]; + columnVectorInfo.directDictionaryGenerator = DirectDictionaryKeyGeneratorFactory + .getDirectDictionaryGenerator(queryDimensions[i].getDimension().getDataType()); + columnVectorInfo.ordinal = queryDimensions[i].getDimension().getOrdinal(); + allColumnInfo[queryDimensions[i].getOrdinal()] = columnVectorInfo; + } else if (queryDimensions[i].getDimension().isComplex()) { + ColumnVectorInfo columnVectorInfo = new ColumnVectorInfo(); + complexList.add(columnVectorInfo); + columnVectorInfo.dimension = queryDimensions[i]; + columnVectorInfo.ordinal = queryDimensions[i].getDimension().getOrdinal(); + columnVectorInfo.genericQueryType = + executionInfo.getComlexDimensionInfoMap().get(columnVectorInfo.ordinal); + allColumnInfo[queryDimensions[i].getOrdinal()] = columnVectorInfo; + } else { + ColumnVectorInfo columnVectorInfo = new ColumnVectorInfo(); + dictInfoList.add(columnVectorInfo); + columnVectorInfo.dimension = queryDimensions[i]; + columnVectorInfo.ordinal = queryDimensions[i].getDimension().getOrdinal(); + allColumnInfo[queryDimensions[i].getOrdinal()] = columnVectorInfo; + } + } + //skipping non existing measure columns in measureColumnInfo as here data + // filling to be done only on existing columns + // for non existing column it is already been filled from restructure based collector + int j = 0; + for (int i = 0; i < queryMeasures.length; i++) { + if (!measureInfo.getMeasureExists()[i]) { + continue; + } + ColumnVectorInfo columnVectorInfo = new ColumnVectorInfo(); + columnVectorInfo.measureVectorFiller = MeasureDataVectorProcessor.MeasureVectorFillerFactory + .getMeasureVectorFiller(queryMeasures[i].getMeasure().getDataType()); + columnVectorInfo.ordinal = queryMeasures[i].getMeasure().getOrdinal(); + columnVectorInfo.measure = queryMeasures[i]; + this.measureColumnInfo[j++] = columnVectorInfo; + allColumnInfo[queryMeasures[i].getOrdinal()] = columnVectorInfo; + } + dictionaryInfo = dictInfoList.toArray(new ColumnVectorInfo[dictInfoList.size()]); + noDictionaryInfo = noDictInfoList.toArray(new ColumnVectorInfo[noDictInfoList.size()]); + complexInfo = complexList.toArray(new ColumnVectorInfo[complexList.size()]); + Arrays.sort(dictionaryInfo); + Arrays.sort(complexInfo); + } + + @Override + public List collectResultInRow(BlockletScannedResult scannedResult, int batchSize) { + throw new UnsupportedOperationException("collectResultInRow is not supported here"); + } + + @Override public void collectResultInColumnarBatch(BlockletScannedResult scannedResult, + CarbonColumnarBatch columnarBatch) { + int numberOfPages = scannedResult.numberOfpages(); + while (scannedResult.getCurrentPageCounter() < numberOfPages) { + int currentPageRowCount = scannedResult.getCurrentPageRowCount(); + if (currentPageRowCount == 0) { + scannedResult.incrementPageCounter(null); + continue; + } + DeleteDeltaVo deltaVo = scannedResult.getCurrentDeleteDeltaVo(); + BitSet bitSet = null; + int deletedRows = 0; + if (deltaVo != null) { + bitSet = deltaVo.getBitSet(); + deletedRows = bitSet.cardinality(); + } + fillColumnVectorDetails(columnarBatch, bitSet); + fillResultToColumnarBatch(scannedResult); + columnarBatch.setActualSize(currentPageRowCount - deletedRows); + scannedResult.setRowCounter(currentPageRowCount - deletedRows); + scannedResult.incrementPageCounter(null); + return; + } + } + + private void fillResultToColumnarBatch(BlockletScannedResult scannedResult) { + scannedResult.fillDataChunks(dictionaryInfo, noDictionaryInfo, measureColumnInfo, + measureInfo.getMeasureOrdinals()); + + } + + private void fillColumnVectorDetails(CarbonColumnarBatch columnarBatch, + BitSet deltaBitSet) { + for (int i = 0; i < allColumnInfo.length; i++) { + allColumnInfo[i].vectorOffset = columnarBatch.getRowCounter(); + allColumnInfo[i].vector = columnarBatch.columnVectors[i]; + allColumnInfo[i].deletedRows = deltaBitSet; + if (null != allColumnInfo[i].dimension) { + allColumnInfo[i].vector.setBlockDataType(dimensionInfo.dataType[i]); + } + } + } + +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index 6a6a9293d24..ebd4a406403 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -478,6 +478,17 @@ private BlockExecutionInfo getBlockExecutionInfoForBlock(QueryModel queryModel, } else { blockExecutionInfo.setPrefetchBlocklet(queryModel.isPreFetchData()); } + // In case of fg datamap it should not go to direct fill. + boolean fgDataMapPathPresent = false; + for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) { + fgDataMapPathPresent = blockInfo.getDataMapWriterPath() != null; + if (fgDataMapPathPresent) { + break; + } + } + blockExecutionInfo + .setDirectVectorFill(queryModel.isDirectVectorFill() && !fgDataMapPathPresent); + blockExecutionInfo .setTotalNumberOfMeasureToRead(segmentProperties.getMeasuresOrdinalToChunkMapping().size()); blockExecutionInfo.setComplexDimensionInfoMap(QueryUtil diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/BlockExecutionInfo.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/BlockExecutionInfo.java index e737b0e381a..f0ef23b1ca6 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/BlockExecutionInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/infos/BlockExecutionInfo.java @@ -216,6 +216,11 @@ public AbstractIndex getDataBlock() { */ private QueryStatisticsModel queryStatisticsModel; + /** + * It fills the vector directly from decoded column page with out any staging and conversions + */ + private boolean isDirectVectorFill; + /** * @param blockIndex the tableBlock to set */ @@ -625,4 +630,12 @@ public QueryStatisticsModel getQueryStatisticsModel() { public void setQueryStatisticsModel(QueryStatisticsModel queryStatisticsModel) { this.queryStatisticsModel = queryStatisticsModel; } + + public boolean isDirectVectorFill() { + return isDirectVectorFill && !isRestructuredBlock; + } + + public void setDirectVectorFill(boolean directVectorFill) { + isDirectVectorFill = directVectorFill; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java index 7849d105487..7f0a5af0335 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/util/QueryUtil.java @@ -760,7 +760,7 @@ public static void putDataToVector(CarbonColumnVector vector, byte[] value, int vector.putNull(vectorRow); } else { if (dt == DataTypes.STRING) { - vector.putBytes(vectorRow, 0, length, value); + vector.putByteArray(vectorRow, 0, length, value); } else if (dt == DataTypes.BOOLEAN) { vector.putBoolean(vectorRow, ByteUtil.toBoolean(value[0])); } else if (dt == DataTypes.BYTE) { @@ -792,7 +792,7 @@ public static void putDataToVector(CarbonColumnVector vector, Object value, int vector.putNull(vectorRow); } else { if (dt == DataTypes.STRING) { - vector.putBytes(vectorRow, (byte[]) value); + vector.putByteArray(vectorRow, (byte[]) value); } else if (dt == DataTypes.BOOLEAN) { vector.putBoolean(vectorRow, (boolean) value); } else if (dt == DataTypes.BYTE) { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java index d743151c6fb..f0feb0e887d 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/AndFilterExecuterImpl.java @@ -50,6 +50,21 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return leftFilters; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + BitSet leftFilters = leftExecuter.prunePages(rawBlockletColumnChunks); + if (leftFilters.isEmpty()) { + return leftFilters; + } + BitSet rightFilter = rightExecuter.prunePages(rawBlockletColumnChunks); + if (rightFilter.isEmpty()) { + return rightFilter; + } + leftFilters.and(rightFilter); + return leftFilters; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { return leftExecuter.applyFilter(value, dimOrdinalMax) && diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java index 15a43c56855..f80a048d021 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java @@ -25,6 +25,7 @@ import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; import org.apache.carbondata.core.scan.filter.FilterUtil; import org.apache.carbondata.core.scan.filter.intf.RowIntf; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; @@ -143,6 +144,40 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return null; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + if (isDimensionPresentInCurrentBlock) { + int chunkIndex = segmentProperties.getDimensionOrdinalToChunkMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + if (null == rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readDimensionChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + DimensionRawColumnChunk dimensionRawColumnChunk = + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(dimensionRawColumnChunk.getPagesCount()); + bitSet.set(0, dimensionRawColumnChunk.getPagesCount()); + return bitSet; + } else if (isMeasurePresentInCurrentBlock) { + int chunkIndex = segmentProperties.getMeasuresOrdinalToChunkMapping() + .get(msrColumnEvaluatorInfo.getColumnIndex()); + if (null == rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readMeasureChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + MeasureRawColumnChunk measureRawColumnChunk = + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]; + + BitSet bitSet = new BitSet(measureRawColumnChunk.getPagesCount()); + bitSet.set(0, measureRawColumnChunk.getPagesCount()); + return bitSet; + } + return null; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) { if (isDimensionPresentInCurrentBlock) { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java index c2fa1ef7731..4066af82136 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FalseFilterExecutor.java @@ -44,6 +44,14 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawChunks, boolean useBit return group; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawChunks) + throws FilterUnsupportedException, IOException { + int numberOfPages = rawChunks.getDataBlock().numberOfPages(); + BitSet set = new BitSet(numberOfPages); + return set; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FilterExecuter.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FilterExecuter.java index edf88b1039b..88b9d6c8a28 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FilterExecuter.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/FilterExecuter.java @@ -35,6 +35,12 @@ public interface FilterExecuter { BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, boolean useBitsetPipeLine) throws FilterUnsupportedException, IOException; + /** + * Prune pages as per the filter + */ + BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException; + /** * apply range filter on a row * @return true: if the value satisfy the filter; or else false. diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java index bb30d71b7a8..b37cdd2ff82 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ImplicitIncludeFilterExecutorImpl.java @@ -53,6 +53,14 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return bitSetGroup; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + BitSet bitSet = new BitSet(rawBlockletColumnChunks.getDataBlock().numberOfPages()); + bitSet.set(0, rawBlockletColumnChunks.getDataBlock().numberOfPages()); + return bitSet; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java index 8070c2f8e04..5c021118bdd 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java @@ -27,6 +27,7 @@ import org.apache.carbondata.core.datastore.page.ColumnPage; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.encoder.Encoding; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; import org.apache.carbondata.core.scan.filter.FilterUtil; import org.apache.carbondata.core.scan.filter.intf.RowIntf; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; @@ -109,20 +110,7 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, boolean isDecoded = false; for (int i = 0; i < dimensionRawColumnChunk.getPagesCount(); i++) { if (dimensionRawColumnChunk.getMaxValues() != null) { - boolean scanRequired; - // for no dictionary measure column comparison can be done - // on the original data as like measure column - if (DataTypeUtil.isPrimitiveColumn(dimColumnEvaluatorInfo.getDimension().getDataType()) - && !dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY)) { - scanRequired = isScanRequired(dimensionRawColumnChunk.getMaxValues()[i], - dimensionRawColumnChunk.getMinValues()[i], dimColumnExecuterInfo.getFilterKeys(), - dimColumnEvaluatorInfo.getDimension().getDataType()); - } else { - scanRequired = isScanRequired(dimensionRawColumnChunk.getMaxValues()[i], - dimensionRawColumnChunk.getMinValues()[i], dimColumnExecuterInfo.getFilterKeys(), - dimensionRawColumnChunk.getMinMaxFlagArray()[i]); - } - if (scanRequired) { + if (isScanRequired(dimensionRawColumnChunk, i)) { DimensionColumnPage dimensionColumnPage = dimensionRawColumnChunk.decodeColumnPage(i); if (!isDecoded) { filterValues = FilterUtil @@ -179,6 +167,75 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return null; } + private boolean isScanRequired(DimensionRawColumnChunk dimensionRawColumnChunk, int i) { + boolean scanRequired; + // for no dictionary measure column comparison can be done + // on the original data as like measure column + if (DataTypeUtil.isPrimitiveColumn(dimColumnEvaluatorInfo.getDimension().getDataType()) + && !dimColumnEvaluatorInfo.getDimension().hasEncoding(Encoding.DICTIONARY)) { + scanRequired = isScanRequired(dimensionRawColumnChunk.getMaxValues()[i], + dimensionRawColumnChunk.getMinValues()[i], dimColumnExecuterInfo.getFilterKeys(), + dimColumnEvaluatorInfo.getDimension().getDataType()); + } else { + scanRequired = isScanRequired(dimensionRawColumnChunk.getMaxValues()[i], + dimensionRawColumnChunk.getMinValues()[i], dimColumnExecuterInfo.getFilterKeys(), + dimensionRawColumnChunk.getMinMaxFlagArray()[i]); + } + return scanRequired; + } + + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + if (isDimensionPresentInCurrentBlock) { + int chunkIndex = segmentProperties.getDimensionOrdinalToChunkMapping() + .get(dimColumnEvaluatorInfo.getColumnIndex()); + if (null == rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readDimensionChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + DimensionRawColumnChunk dimensionRawColumnChunk = + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]; + filterValues = dimColumnExecuterInfo.getFilterKeys(); + BitSet bitSet = new BitSet(dimensionRawColumnChunk.getPagesCount()); + for (int i = 0; i < dimensionRawColumnChunk.getPagesCount(); i++) { + if (dimensionRawColumnChunk.getMaxValues() != null) { + if (isScanRequired(dimensionRawColumnChunk, i)) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } else if (isMeasurePresentInCurrentBlock) { + int chunkIndex = segmentProperties.getMeasuresOrdinalToChunkMapping() + .get(msrColumnEvaluatorInfo.getColumnIndex()); + if (null == rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readMeasureChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + MeasureRawColumnChunk measureRawColumnChunk = + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(measureRawColumnChunk.getPagesCount()); + for (int i = 0; i < measureRawColumnChunk.getPagesCount(); i++) { + if (measureRawColumnChunk.getMaxValues() != null) { + if (isScanRequired(measureRawColumnChunk.getMaxValues()[i], + measureRawColumnChunk.getMinValues()[i], msrColumnExecutorInfo.getFilterKeys(), + msrColumnEvaluatorInfo.getType())) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } + return null; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) { if (isDimensionPresentInCurrentBlock) { byte[][] filterValues = dimColumnExecuterInfo.getFilterKeys(); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/OrFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/OrFilterExecuterImpl.java index 4e7dec2dff7..951d9657ea6 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/OrFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/OrFilterExecuterImpl.java @@ -44,6 +44,15 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return leftFilters; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + BitSet leftFilters = leftExecuter.prunePages(rawBlockletColumnChunks); + BitSet rightFilters = rightExecuter.prunePages(rawBlockletColumnChunks); + leftFilters.or(rightFilters); + return leftFilters; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java index 886a13b4a72..a8bc3418e1d 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java @@ -146,6 +146,44 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return applyNoAndDirectFilter(rawBlockletColumnChunks, useBitsetPipeLine); } + @Override + public BitSet prunePages(RawBlockletColumnChunks blockChunkHolder) + throws FilterUnsupportedException, IOException { + // In case of Alter Table Add and Delete Columns the isDimensionPresentInCurrentBlock can be + // false, in that scenario the default values of the column should be shown. + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock) { + int i = blockChunkHolder.getDataBlock().numberOfPages(); + BitSet bitSet = new BitSet(); + bitSet.set(0, i); + return bitSet; + } + + int chunkIndex = segmentProperties.getDimensionOrdinalToChunkMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + + if (null == blockChunkHolder.getDimensionRawColumnChunks()[chunkIndex]) { + blockChunkHolder.getDimensionRawColumnChunks()[chunkIndex] = blockChunkHolder.getDataBlock() + .readDimensionChunk(blockChunkHolder.getFileReader(), chunkIndex); + } + + DimensionRawColumnChunk rawColumnChunk = + blockChunkHolder.getDimensionRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], rawColumnChunk.getMaxValues()[i], + this.filterRangesValues, rawColumnChunk.getMinMaxFlagArray()[i])) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + + } + /** * apply range filter on a row */ diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java index c403846c10c..a25394f2ef5 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RestructureEvaluatorImpl.java @@ -17,7 +17,9 @@ package org.apache.carbondata.core.scan.filter.executer; +import java.io.IOException; import java.nio.charset.Charset; +import java.util.BitSet; import java.util.List; import org.apache.carbondata.core.constants.CarbonCommonConstants; @@ -28,10 +30,12 @@ import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure; import org.apache.carbondata.core.scan.executor.util.RestructureUtil; +import org.apache.carbondata.core.scan.expression.exception.FilterUnsupportedException; import org.apache.carbondata.core.scan.filter.ColumnFilterInfo; import org.apache.carbondata.core.scan.filter.FilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.processor.RawBlockletColumnChunks; import org.apache.carbondata.core.util.comparator.Comparator; import org.apache.carbondata.core.util.comparator.SerializableComparator; @@ -104,6 +108,12 @@ protected boolean isDimensionDefaultValuePresentInFilterValues( return isDefaultValuePresentInFilterValues; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + return new BitSet(); + } + /** * This method will check whether a default value for the non-existing column is present * in the filter values list diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java index 28c3f87b570..7ca257933dc 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelFilterExecuterImpl.java @@ -289,6 +289,16 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return bitSetGroup; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + readColumnChunks(rawBlockletColumnChunks); + int pages = rawBlockletColumnChunks.getDataBlock().numberOfPages(); + BitSet bitSet = new BitSet(); + bitSet.set(0, pages); + return bitSet; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java index c6835f8a748..06dc3c4e1ab 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java @@ -148,6 +148,61 @@ private void ifDefaultValueMatchesFilter() { return bitSet; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + if (!isDimensionPresentInCurrentBlock[0] && !isMeasurePresentInCurrentBlock[0]) { + int numberOfPages = rawBlockletColumnChunks.getDataBlock().numberOfPages(); + BitSet bitSet = new BitSet(numberOfPages); + bitSet.set(0, numberOfPages); + return bitSet; + } + if (isDimensionPresentInCurrentBlock[0]) { + int chunkIndex = + segmentProperties.getDimensionOrdinalToChunkMapping().get(dimensionChunkIndex[0]); + if (null == rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readDimensionChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + DimensionRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk, i)) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } else if (isMeasurePresentInCurrentBlock[0]) { + int chunkIndex = segmentProperties.getMeasuresOrdinalToChunkMapping() + .get(msrColEvalutorInfoList.get(0).getColumnIndex()); + if (null == rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readMeasureChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + MeasureRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMaxValues()[i], this.msrFilterRangeValues, + msrColEvalutorInfoList.get(0).getType())) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } + return null; + } private boolean isScanRequired(byte[] blockMaxValue, byte[][] filterValues, boolean isMinMaxSet) { if (!isMinMaxSet) { @@ -236,19 +291,7 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, boolean isExclude = false; for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { if (rawColumnChunk.getMaxValues() != null) { - boolean scanRequired; - DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); - // for no dictionary measure column comparison can be done - // on the original data as like measure column - if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) - .getDimension().hasEncoding(Encoding.DICTIONARY)) { - scanRequired = - isScanRequired(rawColumnChunk.getMaxValues()[i], this.filterRangeValues, dataType); - } else { - scanRequired = isScanRequired(rawColumnChunk.getMaxValues()[i], - this.filterRangeValues, rawColumnChunk.getMinMaxFlagArray()[i]); - } - if (scanRequired) { + if (isScanRequired(rawColumnChunk, i)) { int compare = ByteUtil.UnsafeComparer.INSTANCE .compareTo(filterRangeValues[0], rawColumnChunk.getMinValues()[i]); if (compare < 0) { @@ -333,6 +376,22 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return null; } + private boolean isScanRequired(DimensionRawColumnChunk rawColumnChunk, int i) { + boolean scanRequired; + DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); + // for no dictionary measure column comparison can be done + // on the original data as like measure column + if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) + .getDimension().hasEncoding(Encoding.DICTIONARY)) { + scanRequired = + isScanRequired(rawColumnChunk.getMaxValues()[i], this.filterRangeValues, dataType); + } else { + scanRequired = isScanRequired(rawColumnChunk.getMaxValues()[i], + this.filterRangeValues, rawColumnChunk.getMinMaxFlagArray()[i]); + } + return scanRequired; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java index afb646af076..e4c507d9493 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java @@ -234,19 +234,7 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, boolean isExclude = false; for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { if (rawColumnChunk.getMaxValues() != null) { - boolean scanRequired; - DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); - // for no dictionary measure column comparison can be done - // on the original data as like measure column - if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) - .getDimension().hasEncoding(Encoding.DICTIONARY)) { - scanRequired = - isScanRequired(rawColumnChunk.getMaxValues()[i], this.filterRangeValues, dataType); - } else { - scanRequired = isScanRequired(rawColumnChunk.getMaxValues()[i], this.filterRangeValues, - rawColumnChunk.getMinMaxFlagArray()[i]); - } - if (scanRequired) { + if (isScanRequired(rawColumnChunk, i)) { int compare = ByteUtil.UnsafeComparer.INSTANCE .compareTo(filterRangeValues[0], rawColumnChunk.getMinValues()[i]); if (compare <= 0) { @@ -331,6 +319,80 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, } } + private boolean isScanRequired(DimensionRawColumnChunk rawColumnChunk, int i) { + boolean scanRequired; + DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); + // for no dictionary measure column comparison can be done + // on the original data as like measure column + if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) + .getDimension().hasEncoding(Encoding.DICTIONARY)) { + scanRequired = + isScanRequired(rawColumnChunk.getMaxValues()[i], this.filterRangeValues, dataType); + } else { + scanRequired = isScanRequired(rawColumnChunk.getMaxValues()[i], this.filterRangeValues, + rawColumnChunk.getMinMaxFlagArray()[i]); + } + return scanRequired; + } + + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock[0] && !isMeasurePresentInCurrentBlock[0]) { + int numberOfPages = rawBlockletColumnChunks.getDataBlock().numberOfPages(); + BitSet bitSet = new BitSet(numberOfPages); + bitSet.set(0, numberOfPages); + return bitSet; + } + + if (isDimensionPresentInCurrentBlock[0]) { + int chunkIndex = + segmentProperties.getDimensionOrdinalToChunkMapping().get(dimensionChunkIndex[0]); + if (null == rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readDimensionChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + DimensionRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk, i)) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } else { + int chunkIndex = segmentProperties.getMeasuresOrdinalToChunkMapping() + .get(msrColEvalutorInfoList.get(0).getColumnIndex()); + if (null == rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readMeasureChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + MeasureRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMaxValues() != null) { + if (isScanRequired(rawColumnChunk.getMaxValues()[i], this.msrFilterRangeValues, + msrColEvalutorInfoList.get(0).getType())) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } + + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java index 647425d8c32..ac9661e23aa 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java @@ -235,19 +235,7 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, boolean isExclude = false; for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { if (rawColumnChunk.getMinValues() != null) { - boolean scanRequired; - DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); - // for no dictionary measure column comparison can be done - // on the original data as like measure column - if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) - .getDimension().hasEncoding(Encoding.DICTIONARY)) { - scanRequired = - isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, dataType); - } else { - scanRequired = isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, - rawColumnChunk.getMinMaxFlagArray()[i]); - } - if (scanRequired) { + if (isScanRequired(rawColumnChunk, i)) { BitSet bitSet; DimensionColumnPage dimensionColumnPage = rawColumnChunk.decodeColumnPage(i); if (null != rawColumnChunk.getLocalDictionary()) { @@ -314,6 +302,79 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return null; } + private boolean isScanRequired(DimensionRawColumnChunk rawColumnChunk, int i) { + boolean scanRequired; + DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); + // for no dictionary measure column comparison can be done + // on the original data as like measure column + if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) + .getDimension().hasEncoding(Encoding.DICTIONARY)) { + scanRequired = + isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, dataType); + } else { + scanRequired = isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, + rawColumnChunk.getMinMaxFlagArray()[i]); + } + return scanRequired; + } + + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock[0] && !isMeasurePresentInCurrentBlock[0]) { + int numberOfPages = rawBlockletColumnChunks.getDataBlock().numberOfPages(); + BitSet bitSet = new BitSet(numberOfPages); + bitSet.set(0, numberOfPages); + return bitSet; + } + if (isDimensionPresentInCurrentBlock[0]) { + int chunkIndex = + segmentProperties.getDimensionOrdinalToChunkMapping().get(dimensionChunkIndex[0]); + if (null == rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readDimensionChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + DimensionRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMinValues() != null) { + if (isScanRequired(rawColumnChunk, i)) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } else if (isMeasurePresentInCurrentBlock[0]) { + int chunkIndex = segmentProperties.getMeasuresOrdinalToChunkMapping() + .get(msrColEvalutorInfoList.get(0).getColumnIndex()); + if (null == rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readMeasureChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + MeasureRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMinValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], this.msrFilterRangeValues, + msrColEvalutorInfoList.get(0).getType())) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } + return null; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java index 4ef99992177..644cf028fbf 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFilterExecuterImpl.java @@ -233,19 +233,7 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, boolean isExclude = false; for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { if (rawColumnChunk.getMinValues() != null) { - boolean scanRequired; - DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); - // for no dictionary measure column comparison can be done - // on the original data as like measure column - if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) - .getDimension().hasEncoding(Encoding.DICTIONARY)) { - scanRequired = - isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, dataType); - } else { - scanRequired = isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, - rawColumnChunk.getMinMaxFlagArray()[i]); - } - if (scanRequired) { + if (isScanRequired(rawColumnChunk, i)) { BitSet bitSet; DimensionColumnPage dimensionColumnPage = rawColumnChunk.decodeColumnPage(i); if (null != rawColumnChunk.getLocalDictionary()) { @@ -311,6 +299,78 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, } } + private boolean isScanRequired(DimensionRawColumnChunk rawColumnChunk, int i) { + boolean scanRequired; + DataType dataType = dimColEvaluatorInfoList.get(0).getDimension().getDataType(); + // for no dictionary measure column comparison can be done + // on the original data as like measure column + if (DataTypeUtil.isPrimitiveColumn(dataType) && !dimColEvaluatorInfoList.get(0) + .getDimension().hasEncoding(Encoding.DICTIONARY)) { + scanRequired = + isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, dataType); + } else { + scanRequired = isScanRequired(rawColumnChunk.getMinValues()[i], this.filterRangeValues, + rawColumnChunk.getMinMaxFlagArray()[i]); + } + return scanRequired; + } + + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + // select all rows if dimension does not exists in the current block + if (!isDimensionPresentInCurrentBlock[0] && !isMeasurePresentInCurrentBlock[0]) { + int numberOfPages = rawBlockletColumnChunks.getDataBlock().numberOfPages(); + BitSet bitSet = new BitSet(numberOfPages); + bitSet.set(0, numberOfPages); + return bitSet; + } + if (isDimensionPresentInCurrentBlock[0]) { + int chunkIndex = + segmentProperties.getDimensionOrdinalToChunkMapping().get(dimensionChunkIndex[0]); + if (null == rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readDimensionChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + DimensionRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMinValues() != null) { + if (isScanRequired(rawColumnChunk, i)) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } else { + int chunkIndex = segmentProperties.getMeasuresOrdinalToChunkMapping() + .get(msrColEvalutorInfoList.get(0).getColumnIndex()); + if (null == rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) { + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex] = + rawBlockletColumnChunks.getDataBlock() + .readMeasureChunk(rawBlockletColumnChunks.getFileReader(), chunkIndex); + } + MeasureRawColumnChunk rawColumnChunk = + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]; + BitSet bitSet = new BitSet(rawColumnChunk.getPagesCount()); + for (int i = 0; i < rawColumnChunk.getPagesCount(); i++) { + if (rawColumnChunk.getMinValues() != null) { + if (isScanRequired(rawColumnChunk.getMinValues()[i], this.msrFilterRangeValues, + msrColEvalutorInfoList.get(0).getType())) { + bitSet.set(i); + } + } else { + bitSet.set(i); + } + } + return bitSet; + } + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) throws FilterUnsupportedException, IOException { diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/TrueFilterExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/TrueFilterExecutor.java index 08831a40d78..3fd37165d6b 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/TrueFilterExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/TrueFilterExecutor.java @@ -45,6 +45,15 @@ public BitSetGroup applyFilter(RawBlockletColumnChunks rawBlockletColumnChunks, return group; } + @Override + public BitSet prunePages(RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + int numberOfPages = rawBlockletColumnChunks.getDataBlock().numberOfPages(); + BitSet set = new BitSet(numberOfPages); + set.set(0, numberOfPages); + return set; + } + @Override public boolean applyFilter(RowIntf value, int dimOrdinalMax) { return true; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java b/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java index d90c35ea922..0951da0e716 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/model/QueryModel.java @@ -124,6 +124,11 @@ public class QueryModel { private boolean preFetchData = true; + /** + * It fills the vector directly from decoded column page with out any staging and conversions + */ + private boolean isDirectVectorFill; + private QueryModel(CarbonTable carbonTable) { tableBlockInfos = new ArrayList(); invalidSegmentIds = new ArrayList<>(); @@ -406,6 +411,14 @@ public void setPreFetchData(boolean preFetchData) { this.preFetchData = preFetchData; } + public boolean isDirectVectorFill() { + return isDirectVectorFill; + } + + public void setDirectVectorFill(boolean directVectorFill) { + isDirectVectorFill = directVectorFill; + } + @Override public String toString() { return String.format("scan on table %s.%s, %d projection columns with filter (%s)", diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/BlockletScannedResult.java b/core/src/main/java/org/apache/carbondata/core/scan/result/BlockletScannedResult.java index 9191d080bf5..3e58fd6232e 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/BlockletScannedResult.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/BlockletScannedResult.java @@ -72,6 +72,11 @@ public abstract class BlockletScannedResult { */ private int[] pageFilteredRowCount; + /** + * Filtered pages to be decoded and loaded to vector. + */ + private int[] pagesFiltered; + /** * to keep track of number of rows process */ @@ -304,7 +309,7 @@ public void fillColumnarImplicitBatch(ColumnVectorInfo[] vectorInfo) { j : pageFilteredRowId[pageCounter][j]); } - vector.putBytes(vectorOffset++, + vector.putByteArray(vectorOffset++, data.getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET))); } } @@ -341,6 +346,19 @@ public void incrementPageCounter() { } } + /** + * Just increment the page counter and reset the remaining counters. + */ + public void incrementPageCounter(ColumnVectorInfo[] vectorInfos) { + rowCounter = 0; + currentRow = -1; + pageCounter++; + if (null != deletedRecordMap && pageCounter < pagesFiltered.length) { + currentDeleteDeltaVo = + deletedRecordMap.get(blockletNumber + "_" + pagesFiltered[pageCounter]); + } + } + /** * This case is used only in case of compaction, since it does not use filter flow. */ @@ -369,6 +387,36 @@ public void fillDataChunks() { pageUncompressTime.getCount() + (System.currentTimeMillis() - startTime)); } + /** + * Fill all the vectors with data by decompressing/decoding the column page + */ + public void fillDataChunks(ColumnVectorInfo[] dictionaryInfo, ColumnVectorInfo[] noDictionaryInfo, + ColumnVectorInfo[] msrVectorInfo, int[] measuresOrdinal) { + freeDataChunkMemory(); + if (pageCounter >= pageFilteredRowCount.length) { + return; + } + long startTime = System.currentTimeMillis(); + + for (int i = 0; i < this.dictionaryColumnChunkIndexes.length; i++) { + dimRawColumnChunks[dictionaryColumnChunkIndexes[i]] + .convertToDimColDataChunkAndFillVector(pagesFiltered[pageCounter], dictionaryInfo[i]); + } + for (int i = 0; i < this.noDictionaryColumnChunkIndexes.length; i++) { + dimRawColumnChunks[noDictionaryColumnChunkIndexes[i]] + .convertToDimColDataChunkAndFillVector(pagesFiltered[pageCounter], noDictionaryInfo[i]); + } + + for (int i = 0; i < measuresOrdinal.length; i++) { + msrRawColumnChunks[measuresOrdinal[i]] + .convertToColumnPageAndFillVector(pagesFiltered[pageCounter], msrVectorInfo[i]); + } + QueryStatistic pageUncompressTime = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.PAGE_UNCOMPRESS_TIME); + pageUncompressTime.addCountStatistic(QueryStatisticsConstants.PAGE_UNCOMPRESS_TIME, + pageUncompressTime.getCount() + (System.currentTimeMillis() - startTime)); + } + // free the memory for the last page chunk private void freeDataChunkMemory() { for (int i = 0; i < dimensionColumnPages.length; i++) { @@ -390,6 +438,14 @@ public int numberOfpages() { return pageFilteredRowCount.length; } + public int[] getPagesFiltered() { + return pagesFiltered; + } + + public void setPagesFiltered(int[] pagesFiltered) { + this.pagesFiltered = pagesFiltered; + } + /** * Get total rows in the current page * @@ -513,7 +569,13 @@ public void setBlockletId(String blockletId) { // if deleted recors map is present for this block // then get the first page deleted vo if (null != deletedRecordMap) { - currentDeleteDeltaVo = deletedRecordMap.get(blockletNumber + '_' + pageCounter); + String key; + if (pagesFiltered != null) { + key = blockletNumber + '_' + pagesFiltered[pageCounter]; + } else { + key = blockletNumber + '_' + pageCounter; + } + currentDeleteDeltaVo = deletedRecordMap.get(key); } } @@ -616,6 +678,12 @@ public void freeMemory() { */ public void setPageFilteredRowCount(int[] pageFilteredRowCount) { this.pageFilteredRowCount = pageFilteredRowCount; + if (pagesFiltered == null) { + pagesFiltered = new int[pageFilteredRowCount.length]; + for (int i = 0; i < pagesFiltered.length; i++) { + pagesFiltered[i] = i; + } + } } /** @@ -714,6 +782,10 @@ public int markFilteredRows(CarbonColumnarBatch columnarBatch, int startRow, int return rowsFiltered; } + public DeleteDeltaVo getCurrentDeleteDeltaVo() { + return currentDeleteDeltaVo; + } + /** * Below method will be used to check row got deleted * diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java index dd0e8b9c2c1..f6708844895 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnVector.java @@ -27,18 +27,26 @@ public interface CarbonColumnVector { void putFloat(int rowId, float value); + void putFloats(int rowId, int count, float[] src, int srcIndex); + void putShort(int rowId, short value); void putShorts(int rowId, int count, short value); + void putShorts(int rowId, int count, short[] src, int srcIndex); + void putInt(int rowId, int value); void putInts(int rowId, int count, int value); + void putInts(int rowId, int count, int[] src, int srcIndex); + void putLong(int rowId, long value); void putLongs(int rowId, int count, long value); + void putLongs(int rowId, int count, long[] src, int srcIndex); + void putDecimal(int rowId, BigDecimal value, int precision); void putDecimals(int rowId, int count, BigDecimal value, int precision); @@ -47,14 +55,18 @@ public interface CarbonColumnVector { void putDoubles(int rowId, int count, double value); - void putBytes(int rowId, byte[] value); + void putDoubles(int rowId, int count, double[] src, int srcIndex); - void putBytes(int rowId, int count, byte[] value); + void putByteArray(int rowId, byte[] value); - void putBytes(int rowId, int offset, int length, byte[] value); + void putByteArray(int rowId, int offset, int length, byte[] value); void putByte(int rowId, byte value); + void putBytes(int rowId, int count, byte[] value); + + void putBytes(int rowId, int count, byte[] src, int srcIndex); + void putNull(int rowId); void putNulls(int rowId, int count); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnarBatch.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnarBatch.java index 803715cbd26..471f9b2f114 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnarBatch.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonColumnarBatch.java @@ -56,7 +56,9 @@ public void reset() { actualSize = 0; rowCounter = 0; rowsFiltered = 0; - Arrays.fill(filteredRows, false); + if (filteredRows != null) { + Arrays.fill(filteredRows, false); + } for (int i = 0; i < columnVectors.length; i++) { columnVectors[i].reset(); } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java index 50d2ac50d86..2147c437be0 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/CarbonDictionary.java @@ -27,4 +27,6 @@ public interface CarbonDictionary { void setDictionaryUsed(); byte[] getDictionaryValue(int index); + + byte[][] getAllDictionaryValues(); } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java index 59117ddc981..6a9b3b3eec3 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/ColumnVectorInfo.java @@ -16,7 +16,10 @@ */ package org.apache.carbondata.core.scan.result.vector; +import java.util.BitSet; + import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; +import org.apache.carbondata.core.metadata.datatype.DecimalConverterFactory; import org.apache.carbondata.core.scan.filter.GenericQueryType; import org.apache.carbondata.core.scan.model.ProjectionDimension; import org.apache.carbondata.core.scan.model.ProjectionMeasure; @@ -32,6 +35,9 @@ public class ColumnVectorInfo implements Comparable { public DirectDictionaryGenerator directDictionaryGenerator; public MeasureDataVectorProcessor.MeasureVectorFiller measureVectorFiller; public GenericQueryType genericQueryType; + public int[] invertedIndex; + public BitSet deletedRows; + public DecimalConverterFactory.DecimalConverter decimalConverter; @Override public int compareTo(ColumnVectorInfo o) { return ordinal - o.ordinal; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java index f8f663f421e..5dfd6ca24b3 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonColumnVectorImpl.java @@ -146,7 +146,7 @@ public CarbonColumnVectorImpl(int batchSize, DataType dataType) { } } - @Override public void putBytes(int rowId, byte[] value) { + @Override public void putByteArray(int rowId, byte[] value) { bytes[rowId] = value; } @@ -160,7 +160,7 @@ public CarbonColumnVectorImpl(int batchSize, DataType dataType) { } } - @Override public void putBytes(int rowId, int offset, int length, byte[] value) { + @Override public void putByteArray(int rowId, int offset, int length, byte[] value) { bytes[rowId] = new byte[length]; System.arraycopy(value, offset, bytes[rowId], 0, length); } @@ -227,6 +227,31 @@ public boolean isNullAt(int rowId) { } } + public Object getDataArray() { + if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { + return byteArr; + } else if (dataType == DataTypes.SHORT) { + return shorts; + } else if (dataType == DataTypes.INT) { + return ints; + } else if (dataType == DataTypes.LONG || dataType == DataTypes.TIMESTAMP) { + return longs; + } else if (dataType == DataTypes.FLOAT) { + return floats; + } else if (dataType == DataTypes.DOUBLE) { + return doubles; + } else if (dataType instanceof DecimalType) { + return decimals; + } else if (dataType == DataTypes.STRING || dataType == DataTypes.BYTE_ARRAY) { + if (null != carbonDictionary) { + return ints; + } + return bytes; + } else { + return data; + } + } + @Override public void reset() { nullBytes.clear(); if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { @@ -287,4 +312,42 @@ public void setBlockDataType(DataType blockDataType) { * as an optimization to prevent setting nulls. */ public final boolean anyNullsSet() { return anyNullsSet; } + + @Override public void putFloats(int rowId, int count, float[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + floats[rowId ++] = src[i]; + } + } + + @Override public void putShorts(int rowId, int count, short[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + shorts[rowId ++] = src[i]; + } + } + + @Override public void putInts(int rowId, int count, int[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + ints[rowId ++] = src[i]; + } + } + + @Override public void putLongs(int rowId, int count, long[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + longs[rowId ++] = src[i]; + } + } + + @Override public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + doubles[rowId ++] = src[i]; + } + } + + @Override public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + byteArr[rowId ++] = src[i]; + } + } + + } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java index cc3a03cc8a9..c8fd57389b2 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/CarbonDictionaryImpl.java @@ -51,4 +51,7 @@ public CarbonDictionaryImpl(byte[][] dictionary, int actualSize) { return dictionary[index]; } + @Override public byte[][] getAllDictionaryValues() { + return dictionary; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/AbstractCarbonColumnarVector.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/AbstractCarbonColumnarVector.java new file mode 100644 index 00000000000..437eee42bc1 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/AbstractCarbonColumnarVector.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.result.vector.impl.directread; + +import java.math.BigDecimal; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; + +public abstract class AbstractCarbonColumnarVector + implements CarbonColumnVector, ConvertableVector { + + @Override + public void putShorts(int rowId, int count, short value) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putInts(int rowId, int count, int value) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putLongs(int rowId, int count, long value) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putDecimals(int rowId, int count, BigDecimal value, int precision) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putDoubles(int rowId, int count, double value) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putBytes(int rowId, int count, byte[] value) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putNulls(int rowId, int count) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putNotNull(int rowId) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putNotNull(int rowId, int count) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public boolean isNull(int rowId) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void putObject(int rowId, Object obj) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public Object getData(int rowId) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void reset() { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public DataType getType() { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public DataType getBlockDataType() { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void setBlockDataType(DataType blockDataType) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void setFilteredRowsExist(boolean filteredRowsExist) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void setDictionary(CarbonDictionary dictionary) { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public boolean hasDictionary() { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public CarbonColumnVector getDictionaryVector() { + throw new UnsupportedOperationException("Not allowed from here"); + } + + @Override + public void convert() { + // Do nothing + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectFactory.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectFactory.java new file mode 100644 index 00000000000..4884b4de232 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectFactory.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.result.vector.impl.directread; + +import java.util.BitSet; + +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; + +/** + * Factory to create ColumnarVectors for inverted index and delete delta queries. + */ +public final class ColumnarVectorWrapperDirectFactory { + + /** + * Gets carbon vector wrapper to fill the underlying vector based on inverted index and delete + * delta. + * + * @param columnVector Actual vector to be filled. + * @param invertedIndex Inverted index of column page + * @param nullBitset row locations of nulls in bitset + * @param deletedRows deleted rows locations in bitset. + * @param isnullBitsExists whether nullbitset present on this page, usually for dimension columns + * there is no null bitset. + * @return wrapped CarbonColumnVector + */ + public static CarbonColumnVector getDirectVectorWrapperFactory(CarbonColumnVector columnVector, + int[] invertedIndex, BitSet nullBitset, BitSet deletedRows, boolean isnullBitsExists) { + if ((invertedIndex != null && invertedIndex.length > 0) && (deletedRows == null || deletedRows + .isEmpty())) { + return new ColumnarVectorWrapperDirectWithInvertedIndex(columnVector, invertedIndex, + isnullBitsExists); + } else if ((invertedIndex == null || invertedIndex.length == 0) && (deletedRows != null + && !deletedRows.isEmpty())) { + return new ColumnarVectorWrapperDirectWithDeleteDelta(columnVector, deletedRows, nullBitset); + } else if ((invertedIndex != null && invertedIndex.length > 0) && (deletedRows != null + && !deletedRows.isEmpty())) { + return new ColumnarVectorWrapperDirectWithDeleteDeltaAndInvertedIndex(columnVector, + deletedRows, invertedIndex, nullBitset, isnullBitsExists); + } else { + return columnVector; + } + } + +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithDeleteDelta.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithDeleteDelta.java new file mode 100644 index 00000000000..ccde63e87be --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithDeleteDelta.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.result.vector.impl.directread; + +import java.math.BigDecimal; +import java.util.BitSet; + +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; + +/** + * Column vector for column pages which has delete delta, so it uses delta biset to filter out + * data before filling to actual vector. + */ +class ColumnarVectorWrapperDirectWithDeleteDelta extends AbstractCarbonColumnarVector { + + private BitSet deletedRows; + + private BitSet nullBits; + + private int counter; + + private CarbonColumnVector columnVector; + + public ColumnarVectorWrapperDirectWithDeleteDelta(CarbonColumnVector vectorWrapper, + BitSet deletedRows, BitSet nullBits) { + this.deletedRows = deletedRows; + this.nullBits = nullBits; + this.columnVector = vectorWrapper; + } + + @Override + public void putBoolean(int rowId, boolean value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putBoolean(counter++, value); + } + } + } + + @Override + public void putFloat(int rowId, float value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putFloat(counter++, value); + } + } + } + + @Override + public void putShort(int rowId, short value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putShort(counter++, value); + } + } + } + + @Override + public void putInt(int rowId, int value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putInt(counter++, value); + } + } + } + + @Override + public void putLong(int rowId, long value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putLong(counter++, value); + } + } + } + + @Override + public void putDecimal(int rowId, BigDecimal value, int precision) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putDecimal(counter++, value, precision); + } + } + } + + @Override + public void putDouble(int rowId, double value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putDouble(counter++, value); + } + } + } + + @Override + public void putByteArray(int rowId, byte[] value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putByteArray(counter++, value); + } + } + } + + @Override + public void putByteArray(int rowId, int offset, int length, byte[] value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putByteArray(counter++, offset, length, value); + } + } + } + + @Override + public void putByte(int rowId, byte value) { + if (!deletedRows.get(rowId)) { + if (nullBits.get(rowId)) { + columnVector.putNull(counter++); + } else { + columnVector.putByte(counter++, value); + } + } + } + + @Override + public void putNull(int rowId) { + if (!deletedRows.get(rowId)) { + columnVector.putNull(counter++); + } + } + + @Override + public void putFloats(int rowId, int count, float[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!deletedRows.get(rowId++)) { + columnVector.putFloat(counter++, src[i]); + } + } + } + + @Override + public void putShorts(int rowId, int count, short[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!deletedRows.get(rowId++)) { + columnVector.putShort(counter++, src[i]); + } + } + } + + @Override + public void putInts(int rowId, int count, int[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!deletedRows.get(rowId++)) { + columnVector.putInt(counter++, src[i]); + } + } + } + + @Override + public void putLongs(int rowId, int count, long[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!deletedRows.get(rowId++)) { + columnVector.putLong(counter++, src[i]); + } + } + } + + @Override + public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!deletedRows.get(rowId++)) { + columnVector.putDouble(counter++, src[i]); + } + } + } + + @Override + public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!deletedRows.get(rowId++)) { + columnVector.putByte(counter++, src[i]); + } + } + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithDeleteDeltaAndInvertedIndex.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithDeleteDeltaAndInvertedIndex.java new file mode 100644 index 00000000000..30be9527ad4 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithDeleteDeltaAndInvertedIndex.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.result.vector.impl.directread; + +import java.math.BigDecimal; +import java.util.BitSet; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.datatype.DataTypes; +import org.apache.carbondata.core.metadata.datatype.DecimalType; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.impl.CarbonColumnVectorImpl; + +/** + * Column vector for column pages which has delete delta and inverted index, so it uses delta biset + * to filter out data and use inverted index before filling to actual vector + */ +class ColumnarVectorWrapperDirectWithDeleteDeltaAndInvertedIndex + extends AbstractCarbonColumnarVector implements ConvertableVector { + + private BitSet deletedRows; + + private int[] invertedIndex; + + private CarbonColumnVectorImpl carbonColumnVector; + + private CarbonColumnVector columnVector; + + private int precision; + + private BitSet nullBits; + + private boolean isnullBitsExists; + + public ColumnarVectorWrapperDirectWithDeleteDeltaAndInvertedIndex( + CarbonColumnVector vectorWrapper, BitSet deletedRows, int[] invertedIndex, BitSet nullBits, + boolean isnullBitsExists) { + this.deletedRows = deletedRows; + this.invertedIndex = invertedIndex; + carbonColumnVector = new CarbonColumnVectorImpl(invertedIndex.length, vectorWrapper.getType()); + this.columnVector = vectorWrapper; + this.nullBits = nullBits; + this.isnullBitsExists = isnullBitsExists; + } + + @Override + public void putBoolean(int rowId, boolean value) { + carbonColumnVector.putBoolean(invertedIndex[rowId], value); + } + + @Override + public void putFloat(int rowId, float value) { + carbonColumnVector.putFloat(invertedIndex[rowId], value); + } + + @Override + public void putShort(int rowId, short value) { + carbonColumnVector.putShort(invertedIndex[rowId], value); + } + + @Override + public void putInt(int rowId, int value) { + carbonColumnVector.putInt(invertedIndex[rowId], value); + } + + @Override + public void putLong(int rowId, long value) { + carbonColumnVector.putLong(invertedIndex[rowId], value); + } + + @Override + public void putDecimal(int rowId, BigDecimal value, int precision) { + this.precision = precision; + carbonColumnVector.putDecimal(invertedIndex[rowId], value, precision); + } + + @Override + public void putDouble(int rowId, double value) { + carbonColumnVector.putDouble(invertedIndex[rowId], value); + } + + @Override + public void putByteArray(int rowId, byte[] value) { + carbonColumnVector.putByteArray(invertedIndex[rowId], value); + } + + @Override + public void putByteArray(int rowId, int offset, int length, byte[] value) { + carbonColumnVector.putByteArray(invertedIndex[rowId], offset, length, value); + } + + @Override + public void putByte(int rowId, byte value) { + carbonColumnVector.putByte(invertedIndex[rowId], value); + } + + @Override + public void putNull(int rowId) { + if (isnullBitsExists) { + nullBits.set(rowId); + } else { + nullBits.set(invertedIndex[rowId]); + } + } + + @Override + public void putFloats(int rowId, int count, float[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + carbonColumnVector.putFloat(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putShorts(int rowId, int count, short[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + carbonColumnVector.putShort(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putInts(int rowId, int count, int[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + carbonColumnVector.putInt(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putLongs(int rowId, int count, long[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + carbonColumnVector.putLong(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + carbonColumnVector.putDouble(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + carbonColumnVector.putByte(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void convert() { + DataType dataType = columnVector.getType(); + int length = invertedIndex.length; + int counter = 0; + if (dataType == DataTypes.BOOLEAN || dataType == DataTypes.BYTE) { + byte[] dataArray = (byte[]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putByte(counter++, dataArray[i]); + } + } + } + } else if (dataType == DataTypes.SHORT) { + short[] dataArray = (short[]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putShort(counter++, dataArray[i]); + } + } + } + } else if (dataType == DataTypes.INT) { + int[] dataArray = (int[]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putInt(counter++, dataArray[i]); + } + } + } + } else if (dataType == DataTypes.LONG || dataType == DataTypes.TIMESTAMP) { + long[] dataArray = (long[]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putLong(counter++, dataArray[i]); + } + } + } + } else if (dataType == DataTypes.FLOAT) { + float[] dataArray = (float[]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putFloat(counter++, dataArray[i]); + } + } + } + } else if (dataType == DataTypes.DOUBLE) { + double[] dataArray = (double[]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putDouble(counter++, dataArray[i]); + } + } + } + } else if (dataType instanceof DecimalType) { + BigDecimal[] dataArray = (BigDecimal[]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putDecimal(counter++, dataArray[i], precision); + } + } + } + } else if (dataType == DataTypes.STRING || dataType == DataTypes.BYTE_ARRAY) { + byte[][] dataArray = (byte[][]) carbonColumnVector.getDataArray(); + for (int i = 0; i < length; i++) { + if (!deletedRows.get(i)) { + if (nullBits.get(i)) { + columnVector.putNull(counter++); + } else { + columnVector.putByteArray(counter++, dataArray[i]); + } + } + } + } + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithInvertedIndex.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithInvertedIndex.java new file mode 100644 index 00000000000..3bed91c0dce --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ColumnarVectorWrapperDirectWithInvertedIndex.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.result.vector.impl.directread; + +import java.math.BigDecimal; + +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; + +/** + * Column vector for column pages which has inverted index, so it uses inverted index + * before filling to actual vector + */ +class ColumnarVectorWrapperDirectWithInvertedIndex extends AbstractCarbonColumnarVector { + + private int[] invertedIndex; + + private CarbonColumnVector columnVector; + + private boolean isnullBitsExists; + + public ColumnarVectorWrapperDirectWithInvertedIndex(CarbonColumnVector columnVector, + int[] invertedIndex, boolean isnullBitsExists) { + this.invertedIndex = invertedIndex; + this.columnVector = columnVector; + this.isnullBitsExists = isnullBitsExists; + } + + @Override + public void putBoolean(int rowId, boolean value) { + columnVector.putBoolean(invertedIndex[rowId], value); + } + + @Override + public void putFloat(int rowId, float value) { + columnVector.putFloat(invertedIndex[rowId], value); + } + + @Override + public void putShort(int rowId, short value) { + columnVector.putShort(invertedIndex[rowId], value); + } + + @Override + public void putInt(int rowId, int value) { + columnVector.putInt(invertedIndex[rowId], value); + } + + @Override + public void putLong(int rowId, long value) { + columnVector.putLong(invertedIndex[rowId], value); + } + + @Override + public void putDecimal(int rowId, BigDecimal value, int precision) { + columnVector.putDecimal(invertedIndex[rowId], value, precision); + } + + @Override + public void putDouble(int rowId, double value) { + columnVector.putDouble(invertedIndex[rowId], value); + } + + @Override + public void putByteArray(int rowId, byte[] value) { + columnVector.putByteArray(invertedIndex[rowId], value); + } + + @Override + public void putByteArray(int rowId, int offset, int length, byte[] value) { + columnVector.putByteArray(invertedIndex[rowId], offset, length, value); + } + + + @Override + public void putByte(int rowId, byte value) { + columnVector.putByte(invertedIndex[rowId], value); + } + + @Override + public void putNull(int rowId) { + if (isnullBitsExists) { + columnVector.putNull(rowId); + } else { + columnVector.putNull(invertedIndex[rowId]); + } + } + + @Override + public void putFloats(int rowId, int count, float[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + columnVector.putFloat(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putShorts(int rowId, int count, short[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + columnVector.putShort(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putInts(int rowId, int count, int[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + columnVector.putInt(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putLongs(int rowId, int count, long[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + columnVector.putLong(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + columnVector.putDouble(invertedIndex[rowId++], src[i]); + } + } + + @Override + public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + columnVector.putByte(invertedIndex[rowId++], src[i]); + } + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ConvertableVector.java b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ConvertableVector.java new file mode 100644 index 00000000000..7020c66ee45 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/vector/impl/directread/ConvertableVector.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.scan.result.vector.impl.directread; + +/** + * This interface provides method to convert the values by using inverted index and delete delta + * and fill to the underlying vector. + */ +public interface ConvertableVector { + + /** + * Convert the values and fill it to the underlying vector. + */ + void convert(); +} diff --git a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFilterScanner.java b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFilterScanner.java index 57849e2bd2b..2ad84831f44 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFilterScanner.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFilterScanner.java @@ -98,7 +98,11 @@ public BlockletFilterScanner(BlockExecutionInfo blockExecutionInfo, @Override public BlockletScannedResult scanBlocklet(RawBlockletColumnChunks rawBlockletColumnChunks) throws IOException, FilterUnsupportedException { - return executeFilter(rawBlockletColumnChunks); + if (blockExecutionInfo.isDirectVectorFill()) { + return executeFilterForPages(rawBlockletColumnChunks); + } else { + return executeFilter(rawBlockletColumnChunks); + } } @Override @@ -316,4 +320,165 @@ private BlockletScannedResult executeFilter(RawBlockletColumnChunks rawBlockletC readTime.getCount() + dimensionReadTime); return scannedResult; } + + /** + * This method will process the data in below order + * 1. first apply min max on the filter tree and check whether any of the filter + * is fall on the range of min max, if not then return empty result + * 2. If filter falls on min max range then apply filter on actual + * data and get the pruned pages. + * 3. if pruned pages are not empty then read only those blocks(measure or dimension) + * which was present in the query but not present in the filter, as while applying filter + * some of the blocks where already read and present in chunk holder so not need to + * read those blocks again, this is to avoid reading of same blocks which was already read + * 4. Set the blocks and filter pages to scanned result + * + * @param rawBlockletColumnChunks blocklet raw chunk of all columns + * @throws FilterUnsupportedException + */ + private BlockletScannedResult executeFilterForPages( + RawBlockletColumnChunks rawBlockletColumnChunks) + throws FilterUnsupportedException, IOException { + long startTime = System.currentTimeMillis(); + QueryStatistic totalBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM); + totalBlockletStatistic.addCountStatistic(QueryStatisticsConstants.TOTAL_BLOCKLET_NUM, + totalBlockletStatistic.getCount() + 1); + // apply filter on actual data, for each page + BitSet pages = this.filterExecuter.prunePages(rawBlockletColumnChunks); + // if filter result is empty then return with empty result + if (pages.isEmpty()) { + CarbonUtil.freeMemory(rawBlockletColumnChunks.getDimensionRawColumnChunks(), + rawBlockletColumnChunks.getMeasureRawColumnChunks()); + + QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME); + scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, + scanTime.getCount() + (System.currentTimeMillis() - startTime)); + + QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.PAGE_SCANNED); + scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, + scannedPages.getCount()); + return createEmptyResult(); + } + + BlockletScannedResult scannedResult = + new FilterQueryScannedResult(blockExecutionInfo, queryStatisticsModel); + + // valid scanned blocklet + QueryStatistic validScannedBlockletStatistic = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM); + validScannedBlockletStatistic + .addCountStatistic(QueryStatisticsConstants.VALID_SCAN_BLOCKLET_NUM, + validScannedBlockletStatistic.getCount() + 1); + // adding statistics for valid number of pages + QueryStatistic validPages = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.VALID_PAGE_SCANNED); + validPages.addCountStatistic(QueryStatisticsConstants.VALID_PAGE_SCANNED, + validPages.getCount() + pages.cardinality()); + QueryStatistic scannedPages = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.PAGE_SCANNED); + scannedPages.addCountStatistic(QueryStatisticsConstants.PAGE_SCANNED, + scannedPages.getCount() + pages.cardinality()); + // get the row indexes from bit set for each page + int[] pageFilteredPages = new int[pages.cardinality()]; + int[] numberOfRows = new int[pages.cardinality()]; + int index = 0; + for (int i = pages.nextSetBit(0); i >= 0; i = pages.nextSetBit(i + 1)) { + pageFilteredPages[index] = i; + numberOfRows[index++] = rawBlockletColumnChunks.getDataBlock().getPageRowCount(i); + } + // count(*) case there would not be any dimensions are measures selected. + long dimensionReadTime = System.currentTimeMillis(); + dimensionReadTime = System.currentTimeMillis() - dimensionReadTime; + + FileReader fileReader = rawBlockletColumnChunks.getFileReader(); + + + DimensionRawColumnChunk[] dimensionRawColumnChunks = + new DimensionRawColumnChunk[blockExecutionInfo.getTotalNumberDimensionToRead()]; + int numDimensionChunks = dimensionRawColumnChunks.length; + // read dimension chunk blocks from file which is not present + for (int chunkIndex = 0; chunkIndex < numDimensionChunks; chunkIndex++) { + dimensionRawColumnChunks[chunkIndex] = + rawBlockletColumnChunks.getDimensionRawColumnChunks()[chunkIndex]; + } + int[][] allSelectedDimensionColumnIndexRange = + blockExecutionInfo.getAllSelectedDimensionColumnIndexRange(); + DimensionRawColumnChunk[] projectionListDimensionChunk = rawBlockletColumnChunks.getDataBlock() + .readDimensionChunks(fileReader, allSelectedDimensionColumnIndexRange); + for (int[] columnIndexRange : allSelectedDimensionColumnIndexRange) { + System.arraycopy(projectionListDimensionChunk, columnIndexRange[0], + dimensionRawColumnChunks, columnIndexRange[0], + columnIndexRange[1] + 1 - columnIndexRange[0]); + } + + /* + * in case projection if the projected dimension are not loaded in the dimensionColumnDataChunk + * then loading them + */ + int[] projectionListDimensionIndexes = blockExecutionInfo.getProjectionListDimensionIndexes(); + for (int projectionListDimensionIndex : projectionListDimensionIndexes) { + if (null == dimensionRawColumnChunks[projectionListDimensionIndex]) { + dimensionRawColumnChunks[projectionListDimensionIndex] = + rawBlockletColumnChunks.getDataBlock().readDimensionChunk( + fileReader, projectionListDimensionIndex); + } + } + + DimensionColumnPage[][] dimensionColumnPages = + new DimensionColumnPage[numDimensionChunks][pages.cardinality()]; + MeasureRawColumnChunk[] measureRawColumnChunks = + new MeasureRawColumnChunk[blockExecutionInfo.getTotalNumberOfMeasureToRead()]; + int numMeasureChunks = measureRawColumnChunks.length; + + // read the measure chunk blocks which is not present + for (int chunkIndex = 0; chunkIndex < numMeasureChunks; chunkIndex++) { + if (null != rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]) { + measureRawColumnChunks[chunkIndex] = + rawBlockletColumnChunks.getMeasureRawColumnChunks()[chunkIndex]; + } + } + + int[][] allSelectedMeasureColumnIndexRange = + blockExecutionInfo.getAllSelectedMeasureIndexRange(); + MeasureRawColumnChunk[] projectionListMeasureChunk = rawBlockletColumnChunks.getDataBlock() + .readMeasureChunks(fileReader, allSelectedMeasureColumnIndexRange); + for (int[] columnIndexRange : allSelectedMeasureColumnIndexRange) { + System.arraycopy(projectionListMeasureChunk, columnIndexRange[0], measureRawColumnChunks, + columnIndexRange[0], columnIndexRange[1] + 1 - columnIndexRange[0]); + } + /* + * in case projection if the projected measure are not loaded in the ColumnPage + * then loading them + */ + int[] projectionListMeasureIndexes = blockExecutionInfo.getProjectionListMeasureIndexes(); + for (int projectionListMeasureIndex : projectionListMeasureIndexes) { + if (null == measureRawColumnChunks[projectionListMeasureIndex]) { + measureRawColumnChunks[projectionListMeasureIndex] = rawBlockletColumnChunks.getDataBlock() + .readMeasureChunk(fileReader, projectionListMeasureIndex); + } + } + ColumnPage[][] measureColumnPages = new ColumnPage[numMeasureChunks][pages.cardinality()]; + scannedResult.setDimensionColumnPages(dimensionColumnPages); + scannedResult.setMeasureColumnPages(measureColumnPages); + scannedResult.setDimRawColumnChunks(dimensionRawColumnChunks); + scannedResult.setMsrRawColumnChunks(measureRawColumnChunks); + scannedResult.setPageFilteredRowCount(numberOfRows); + scannedResult.setPagesFiltered(pageFilteredPages); + scannedResult.setBlockletId( + blockExecutionInfo.getBlockIdString() + CarbonCommonConstants.FILE_SEPARATOR + + rawBlockletColumnChunks.getDataBlock().blockletIndex()); + // adding statistics for carbon scan time + QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME); + scanTime.addCountStatistic(QueryStatisticsConstants.SCAN_BLOCKlET_TIME, + scanTime.getCount() + (System.currentTimeMillis() - startTime - dimensionReadTime)); + QueryStatistic readTime = queryStatisticsModel.getStatisticsTypeAndObjMap() + .get(QueryStatisticsConstants.READ_BLOCKlET_TIME); + readTime.addCountStatistic(QueryStatisticsConstants.READ_BLOCKlET_TIME, + readTime.getCount() + dimensionReadTime); + return scannedResult; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFullScanner.java b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFullScanner.java index 4ec8cb690a2..62674bc8a08 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFullScanner.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/scanner/impl/BlockletFullScanner.java @@ -123,7 +123,9 @@ public BlockletScannedResult scanBlocklet( } } scannedResult.setPageFilteredRowCount(numberOfRows); - scannedResult.fillDataChunks(); + if (!blockExecutionInfo.isDirectVectorFill()) { + scannedResult.fillDataChunks(); + } // adding statistics for carbon scan time QueryStatistic scanTime = queryStatisticsModel.getStatisticsTypeAndObjMap() .get(QueryStatisticsConstants.SCAN_BLOCKlET_TIME); diff --git a/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsModel.java b/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsModel.java index 9635896e86a..2a294ae607a 100644 --- a/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsModel.java +++ b/core/src/main/java/org/apache/carbondata/core/stats/QueryStatisticsModel.java @@ -20,11 +20,20 @@ import java.util.HashMap; import java.util.Map; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.util.CarbonProperties; + public class QueryStatisticsModel { + private QueryStatisticsRecorder recorder; + private Map statisticsTypeAndObjMap = new HashMap(); + private boolean isEnabled = Boolean.parseBoolean(CarbonProperties.getInstance() + .getProperty(CarbonCommonConstants.ENABLE_QUERY_STATISTICS, + CarbonCommonConstants.ENABLE_QUERY_STATISTICS_DEFAULT)); + public QueryStatisticsRecorder getRecorder() { return recorder; } @@ -36,4 +45,8 @@ public void setRecorder(QueryStatisticsRecorder recorder) { public Map getStatisticsTypeAndObjMap() { return statisticsTypeAndObjMap; } + + public boolean isEnabled() { + return isEnabled; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java index a32ad52567e..4c4dac60949 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonProperties.java @@ -1531,6 +1531,12 @@ public int getSortMemorySpillPercentage() { return spillPercentage; } + public boolean getPushRowFiltersForVector() { + String pushFilters = getProperty(CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR, + CarbonCommonConstants.CARBON_PUSH_ROW_FILTERS_FOR_VECTOR_DEFAULT); + return Boolean.parseBoolean(pushFilters); + } + private void validateSortMemorySpillPercentage() { String spillPercentageStr = carbonProperties.getProperty( CARBON_LOAD_SORT_MEMORY_SPILL_PERCENTAGE, @@ -1589,4 +1595,6 @@ private void validateStringCharacterLimit() { CarbonCommonConstants.CARBON_MINMAX_ALLOWED_BYTE_COUNT_DEFAULT); } } + + } diff --git a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java index b9e90d6c6be..2662cee6678 100644 --- a/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java +++ b/core/src/test/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImplTest.java @@ -270,11 +270,11 @@ public void testRangBinarySearch() { long newTime = 0; long start; long end; - + // dimension's data number in a blocklet, usually default is 32000 - int dataChunkSize = 32000; + int dataChunkSize = 32000; // repeat query times in the test - int queryTimes = 10000; + int queryTimes = 10000; // repeated times for a dictionary value int repeatTimes = 200; //filtered value count in a blocklet diff --git a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java index a4abc613a69..ecd61bd6e5a 100644 --- a/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java +++ b/core/src/test/java/org/apache/carbondata/core/util/CarbonUtilTest.java @@ -807,7 +807,7 @@ public DataInputStream getDataInputStream(String path, FileFactory.FileType file .getFirstIndexUsingBinarySearch(fixedLengthDimensionDataChunk, 1, 3, compareValue, true); assertEquals(2, result); } - + @Test public void testBinaryRangeSearch() { diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java index b843709f954..7d6eda03678 100644 --- a/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java +++ b/integration/presto/src/main/java/org/apache/carbondata/presto/CarbonColumnVectorWrapper.java @@ -150,24 +150,24 @@ public CarbonColumnVectorWrapper(CarbonColumnVectorImpl columnVector, boolean[] } } - @Override public void putBytes(int rowId, byte[] value) { + @Override public void putByteArray(int rowId, byte[] value) { if (!filteredRows[rowId]) { - columnVector.putBytes(counter++, value); + columnVector.putByteArray(counter++, value); } } @Override public void putBytes(int rowId, int count, byte[] value) { for (int i = 0; i < count; i++) { if (!filteredRows[rowId]) { - columnVector.putBytes(counter++, value); + columnVector.putByteArray(counter++, value); } rowId++; } } - @Override public void putBytes(int rowId, int offset, int length, byte[] value) { + @Override public void putByteArray(int rowId, int offset, int length, byte[] value) { if (!filteredRows[rowId]) { - columnVector.putBytes(counter++, offset, length, value); + columnVector.putByteArray(counter++, offset, length, value); } } @@ -246,4 +246,59 @@ public void setBlockDataType(DataType blockDataType) { return this.columnVector; } + @Override public void putFloats(int rowId, int count, float[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + columnVector.putFloat(counter++, src[i]); + } + rowId++; + } + } + + @Override public void putShorts(int rowId, int count, short[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + columnVector.putShort(counter++, src[i]); + } + rowId++; + } + } + + @Override public void putInts(int rowId, int count, int[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + columnVector.putInt(counter++, src[i]); + } + rowId++; + } + } + + @Override public void putLongs(int rowId, int count, long[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + columnVector.putLong(counter++, src[i]); + } + rowId++; + } + } + + @Override public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + columnVector.putDouble(counter++, src[i]); + } + rowId++; + } + } + + @Override public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + columnVector.putByte(counter++, src[i]); + } + rowId++; + } + } + + } diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/readers/SliceStreamReader.java b/integration/presto/src/main/java/org/apache/carbondata/presto/readers/SliceStreamReader.java index 39fd19a0d0d..ab270fcc685 100644 --- a/integration/presto/src/main/java/org/apache/carbondata/presto/readers/SliceStreamReader.java +++ b/integration/presto/src/main/java/org/apache/carbondata/presto/readers/SliceStreamReader.java @@ -71,11 +71,11 @@ public SliceStreamReader(int batchSize, DataType dataType, values[rowId] = value; } - @Override public void putBytes(int rowId, byte[] value) { + @Override public void putByteArray(int rowId, byte[] value) { type.writeSlice(builder, wrappedBuffer(value)); } - @Override public void putBytes(int rowId, int offset, int length, byte[] value) { + @Override public void putByteArray(int rowId, int offset, int length, byte[] value) { byte[] byteArr = new byte[length]; System.arraycopy(value, offset, byteArr, 0, length); type.writeSlice(builder, wrappedBuffer(byteArr)); diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/CastColumnTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/CastColumnTestCase.scala index 24524b808d5..a98923014c3 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/CastColumnTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/CastColumnTestCase.scala @@ -224,7 +224,7 @@ class CastColumnTestCase extends QueryTest with BeforeAndAfterAll { test("Dictionary INT In to implicit Int") { checkAnswer( - sql("select empno,empname,workgroupcategory from DICTIONARY_CARBON_1 where workgroupcategory in ('1', '2')"), + sql("select empno,empname,workgroupcategory from DICTIONARY_CARBON_1 where workgroupcategory in (1, 2)"), sql("select empno,empname,workgroupcategory from DICTIONARY_HIVE_1 where workgroupcategory in ('1', '2')") ) } diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/AllDataTypesTestCaseFilter.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/AllDataTypesTestCaseFilter.scala index 73786c837dc..c96e6435690 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/AllDataTypesTestCaseFilter.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/AllDataTypesTestCaseFilter.scala @@ -57,15 +57,25 @@ class AllDataTypesTestCaseFilter extends QueryTest with BeforeAndAfterAll { test("verify like query ends with filter push down") { val df = sql("select * from alldatatypestableFilter where empname like '%nandh'").queryExecution .sparkPlan - assert(df.asInstanceOf[CarbonDataSourceScan].metadata - .get("PushedFilters").get.contains("CarbonEndsWith")) + if (df.isInstanceOf[CarbonDataSourceScan]) { + assert(df.asInstanceOf[CarbonDataSourceScan].metadata + .get("PushedFilters").get.contains("CarbonEndsWith")) + } else { + assert(df.children.head.asInstanceOf[CarbonDataSourceScan].metadata + .get("PushedFilters").get.contains("CarbonEndsWith")) + } } test("verify like query contains with filter push down") { val df = sql("select * from alldatatypestableFilter where empname like '%nand%'").queryExecution .sparkPlan - assert(df.asInstanceOf[CarbonDataSourceScan].metadata - .get("PushedFilters").get.contains("CarbonContainsWith")) + if (df.isInstanceOf[CarbonDataSourceScan]) { + assert(df.asInstanceOf[CarbonDataSourceScan].metadata + .get("PushedFilters").get.contains("CarbonContainsWith")) + } else { + assert(df.children.head.asInstanceOf[CarbonDataSourceScan].metadata + .get("PushedFilters").get.contains("CarbonContainsWith")) + } } override def afterAll { diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala index 1a7eae2d7d2..25550bde787 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala @@ -90,6 +90,8 @@ class CarbonScanRDD[T: ClassTag]( } private var vectorReader = false + private var directScan = false + private val bucketedTable = tableInfo.getFactTable.getBucketingInfo @transient val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) @@ -228,9 +230,12 @@ class CarbonScanRDD[T: ClassTag]( statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis) statisticRecorder.recordStatisticsForDriver(statistic, queryId) statistic = new QueryStatistic() - val carbonDistribution = CarbonProperties.getInstance().getProperty( + var carbonDistribution = CarbonProperties.getInstance().getProperty( CarbonCommonConstants.CARBON_TASK_DISTRIBUTION, CarbonCommonConstants.CARBON_TASK_DISTRIBUTION_DEFAULT) + if (directScan) { + carbonDistribution = CarbonCommonConstants.CARBON_TASK_DISTRIBUTION_MERGE_FILES + } // If bucketing is enabled on table then partitions should be grouped based on buckets. if (bucketedTable != null) { var i = 0 @@ -437,6 +442,7 @@ class CarbonScanRDD[T: ClassTag]( case _ => // create record reader for CarbonData file format if (vectorReader) { + model.setDirectVectorFill(directScan) val carbonRecordReader = createVectorizedCarbonRecordReader(model, inputMetricsStats, "true") @@ -748,4 +754,8 @@ class CarbonScanRDD[T: ClassTag]( vectorReader = boolean } + // TODO find the better way set it. + def setDirectScanSupport(boolean: Boolean): Unit = { + directScan = boolean + } } diff --git a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java index 51210276840..a605134730b 100644 --- a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java +++ b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapper.java @@ -29,7 +29,9 @@ class ColumnarVectorWrapper implements CarbonColumnVector { - private CarbonVectorProxy sparkColumnVectorProxy; + private CarbonVectorProxy.ColumnVectorProxy sparkColumnVectorProxy; + + private CarbonVectorProxy carbonVectorProxy; private boolean[] filteredRows; @@ -47,8 +49,9 @@ class ColumnarVectorWrapper implements CarbonColumnVector { ColumnarVectorWrapper(CarbonVectorProxy writableColumnVector, boolean[] filteredRows, int ordinal) { - this.sparkColumnVectorProxy = writableColumnVector; + this.sparkColumnVectorProxy = writableColumnVector.getColumnVector(ordinal); this.filteredRows = filteredRows; + this.carbonVectorProxy = writableColumnVector; this.ordinal = ordinal; } @@ -167,7 +170,7 @@ class ColumnarVectorWrapper implements CarbonColumnVector { } } - @Override public void putBytes(int rowId, byte[] value) { + @Override public void putByteArray(int rowId, byte[] value) { if (!filteredRows[rowId]) { sparkColumnVectorProxy.putByteArray(counter++, value, ordinal); } @@ -182,7 +185,7 @@ class ColumnarVectorWrapper implements CarbonColumnVector { } } - @Override public void putBytes(int rowId, int offset, int length, byte[] value) { + @Override public void putByteArray(int rowId, int offset, int length, byte[] value) { if (!filteredRows[rowId]) { sparkColumnVectorProxy.putByteArray(counter++, value, offset, length, ordinal); } @@ -276,12 +279,67 @@ public void setFilteredRowsExist(boolean filteredRowsExist) { } public void reserveDictionaryIds() { - sparkColumnVectorProxy.reserveDictionaryIds(sparkColumnVectorProxy.numRows(), ordinal); - dictionaryVector = new ColumnarVectorWrapper(sparkColumnVectorProxy, filteredRows, ordinal); + sparkColumnVectorProxy.reserveDictionaryIds(carbonVectorProxy.numRows(), ordinal); + dictionaryVector = new ColumnarVectorWrapper(carbonVectorProxy, filteredRows, ordinal); ((ColumnarVectorWrapper) dictionaryVector).isDictionary = true; } @Override public CarbonColumnVector getDictionaryVector() { return dictionaryVector; } + + @Override public void putFloats(int rowId, int count, float[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + sparkColumnVectorProxy.putFloat(counter++, src[i], ordinal); + } + rowId++; + } + } + + @Override public void putShorts(int rowId, int count, short[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + sparkColumnVectorProxy.putShort(counter++, src[i], ordinal); + } + rowId++; + } + } + + @Override public void putInts(int rowId, int count, int[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + sparkColumnVectorProxy.putInt(counter++, src[i], ordinal); + } + rowId++; + } + } + + @Override public void putLongs(int rowId, int count, long[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + sparkColumnVectorProxy.putLong(counter++, src[i], ordinal); + } + rowId++; + } + } + + @Override public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + sparkColumnVectorProxy.putDouble(counter++, src[i], ordinal); + } + rowId++; + } + } + + @Override public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + for (int i = srcIndex; i < count; i++) { + if (!filteredRows[rowId]) { + sparkColumnVectorProxy.putByte(counter++, src[i], ordinal); + } + rowId++; + } + } + } diff --git a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapperDirect.java b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapperDirect.java new file mode 100644 index 00000000000..f6a90d27bd9 --- /dev/null +++ b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/ColumnarVectorWrapperDirect.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.spark.vectorreader; + +import java.math.BigDecimal; + +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.scan.result.vector.CarbonColumnVector; +import org.apache.carbondata.core.scan.result.vector.CarbonDictionary; + +import org.apache.spark.sql.CarbonVectorProxy; +import org.apache.spark.sql.carbondata.execution.datasources.CarbonSparkDataSourceUtil; +import org.apache.spark.sql.types.Decimal; + +/** + * Fills the vector directly with out considering any deleted rows. + */ +class ColumnarVectorWrapperDirect implements CarbonColumnVector { + + protected CarbonVectorProxy.ColumnVectorProxy sparkColumnVectorProxy; + + protected CarbonVectorProxy carbonVectorProxy; + + protected int ordinal; + + protected boolean isDictionary; + + private DataType blockDataType; + + private CarbonColumnVector dictionaryVector; + + ColumnarVectorWrapperDirect(CarbonVectorProxy writableColumnVector, int ordinal) { + this.sparkColumnVectorProxy = writableColumnVector.getColumnVector(ordinal); + this.carbonVectorProxy = writableColumnVector; + this.ordinal = ordinal; + } + + @Override public void putBoolean(int rowId, boolean value) { + sparkColumnVectorProxy.putBoolean(rowId, value, ordinal); + } + + @Override public void putFloat(int rowId, float value) { + sparkColumnVectorProxy.putFloat(rowId, value, ordinal); + } + + @Override public void putShort(int rowId, short value) { + sparkColumnVectorProxy.putShort(rowId, value, ordinal); + } + + @Override public void putShorts(int rowId, int count, short value) { + sparkColumnVectorProxy.putShorts(rowId, count, value, ordinal); + } + + @Override public void putInt(int rowId, int value) { + if (isDictionary) { + sparkColumnVectorProxy.putDictionaryInt(rowId, value, ordinal); + } else { + sparkColumnVectorProxy.putInt(rowId, value, ordinal); + } + } + + @Override public void putInts(int rowId, int count, int value) { + sparkColumnVectorProxy.putInts(rowId, count, value, ordinal); + } + + @Override public void putLong(int rowId, long value) { + sparkColumnVectorProxy.putLong(rowId, value, ordinal); + } + + @Override public void putLongs(int rowId, int count, long value) { + sparkColumnVectorProxy.putLongs(rowId, count, value, ordinal); + } + + @Override public void putDecimal(int rowId, BigDecimal value, int precision) { + Decimal toDecimal = Decimal.apply(value); + sparkColumnVectorProxy.putDecimal(rowId, toDecimal, precision, ordinal); + } + + @Override public void putDecimals(int rowId, int count, BigDecimal value, int precision) { + Decimal decimal = Decimal.apply(value); + for (int i = 0; i < count; i++) { + sparkColumnVectorProxy.putDecimal(rowId, decimal, precision, ordinal); + rowId++; + } + } + + @Override public void putDouble(int rowId, double value) { + sparkColumnVectorProxy.putDouble(rowId, value, ordinal); + } + + @Override public void putDoubles(int rowId, int count, double value) { + sparkColumnVectorProxy.putDoubles(rowId, count, value, ordinal); + } + + @Override public void putByteArray(int rowId, byte[] value) { + sparkColumnVectorProxy.putByteArray(rowId, value, ordinal); + } + + @Override + public void putBytes(int rowId, int count, byte[] value) { + for (int i = 0; i < count; i++) { + sparkColumnVectorProxy.putByteArray(rowId, value, ordinal); + rowId++; + } + } + + @Override public void putByteArray(int rowId, int offset, int length, byte[] value) { + sparkColumnVectorProxy.putByteArray(rowId, value, offset, length, ordinal); + } + + @Override public void putNull(int rowId) { + sparkColumnVectorProxy.putNull(rowId, ordinal); + } + + @Override public void putNulls(int rowId, int count) { + sparkColumnVectorProxy.putNulls(rowId, count, ordinal); + } + + @Override public void putNotNull(int rowId) { + sparkColumnVectorProxy.putNotNull(rowId, ordinal); + } + + @Override public void putNotNull(int rowId, int count) { + sparkColumnVectorProxy.putNotNulls(rowId, count, ordinal); + } + + @Override public boolean isNull(int rowId) { + return sparkColumnVectorProxy.isNullAt(rowId, ordinal); + } + + @Override public void putObject(int rowId, Object obj) { + //TODO handle complex types + } + + @Override public Object getData(int rowId) { + //TODO handle complex types + return null; + } + + @Override public void reset() { + if (null != dictionaryVector) { + dictionaryVector.reset(); + } + } + + @Override public DataType getType() { + return CarbonSparkDataSourceUtil + .convertSparkToCarbonDataType(sparkColumnVectorProxy.dataType(ordinal)); + } + + @Override public DataType getBlockDataType() { + return blockDataType; + } + + @Override public void setBlockDataType(DataType blockDataType) { + this.blockDataType = blockDataType; + } + + @Override public void setDictionary(CarbonDictionary dictionary) { + sparkColumnVectorProxy.setDictionary(dictionary, ordinal); + } + + @Override public boolean hasDictionary() { + return sparkColumnVectorProxy.hasDictionary(ordinal); + } + + public void reserveDictionaryIds() { + sparkColumnVectorProxy.reserveDictionaryIds(carbonVectorProxy.numRows(), ordinal); + dictionaryVector = new ColumnarVectorWrapperDirect(carbonVectorProxy, ordinal); + ((ColumnarVectorWrapperDirect) dictionaryVector).isDictionary = true; + } + + @Override public CarbonColumnVector getDictionaryVector() { + return dictionaryVector; + } + + @Override public void putByte(int rowId, byte value) { + sparkColumnVectorProxy.putByte(rowId, value, ordinal); + } + + @Override public void setFilteredRowsExist(boolean filteredRowsExist) { + + } + + @Override public void putFloats(int rowId, int count, float[] src, int srcIndex) { + sparkColumnVectorProxy.putFloats(rowId, count, src, srcIndex); + } + + @Override public void putShorts(int rowId, int count, short[] src, int srcIndex) { + sparkColumnVectorProxy.putShorts(rowId, count, src, srcIndex); + } + + @Override public void putInts(int rowId, int count, int[] src, int srcIndex) { + sparkColumnVectorProxy.putInts(rowId, count, src, srcIndex); + } + + @Override public void putLongs(int rowId, int count, long[] src, int srcIndex) { + sparkColumnVectorProxy.putLongs(rowId, count, src, srcIndex); + } + + @Override public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + sparkColumnVectorProxy.putDoubles(rowId, count, src, srcIndex); + } + + @Override public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + sparkColumnVectorProxy.putBytes(rowId, count, src, srcIndex); + } +} diff --git a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java index 779c62f9ddc..77d867dc182 100644 --- a/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java +++ b/integration/spark-datasource/src/main/scala/org/apache/carbondata/spark/vectorreader/VectorizedCarbonRecordReader.java @@ -26,6 +26,7 @@ import org.apache.log4j.Logger; import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.cache.dictionary.Dictionary; +import org.apache.carbondata.core.constants.CarbonV3DataFormatConstants; import org.apache.carbondata.core.datastore.block.TableBlockInfo; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator; import org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryKeyGeneratorFactory; @@ -281,7 +282,12 @@ public void initBatch(MemoryMode memMode, StructType partitionColumns, schema = schema.add(field); } } - vectorProxy = new CarbonVectorProxy(DEFAULT_MEMORY_MODE,schema,DEFAULT_BATCH_SIZE); + short batchSize = DEFAULT_BATCH_SIZE; + if (queryModel.isDirectVectorFill()) { + batchSize = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT; + } + vectorProxy = new CarbonVectorProxy(DEFAULT_MEMORY_MODE, schema, batchSize); + if (partitionColumns != null) { int partitionIdx = fields.length; for (int i = 0; i < partitionColumns.fields().length; i++) { @@ -290,12 +296,24 @@ public void initBatch(MemoryMode memMode, StructType partitionColumns, } } CarbonColumnVector[] vectors = new CarbonColumnVector[fields.length]; - boolean[] filteredRows = new boolean[vectorProxy.numRows()]; - for (int i = 0; i < fields.length; i++) { - vectors[i] = new ColumnarVectorWrapper(vectorProxy, filteredRows, i); - if (isNoDictStringField[i]) { - if (vectors[i] instanceof ColumnarVectorWrapper) { - ((ColumnarVectorWrapper) vectors[i]).reserveDictionaryIds(); + boolean[] filteredRows = null; + if (queryModel.isDirectVectorFill()) { + for (int i = 0; i < fields.length; i++) { + vectors[i] = new ColumnarVectorWrapperDirect(vectorProxy, i); + if (isNoDictStringField[i]) { + if (vectors[i] instanceof ColumnarVectorWrapperDirect) { + ((ColumnarVectorWrapperDirect) vectors[i]).reserveDictionaryIds(); + } + } + } + } else { + filteredRows = new boolean[vectorProxy.numRows()]; + for (int i = 0; i < fields.length; i++) { + vectors[i] = new ColumnarVectorWrapper(vectorProxy, filteredRows, i); + if (isNoDictStringField[i]) { + if (vectors[i] instanceof ColumnarVectorWrapper) { + ((ColumnarVectorWrapper) vectors[i]).reserveDictionaryIds(); + } } } } diff --git a/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala b/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala index 53b1bb12a7f..66eca1b0fd8 100644 --- a/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala +++ b/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala @@ -404,6 +404,7 @@ class SparkCarbonFileFormat extends FileFormat model.setFreeUnsafeMemory(!isAdded) } val carbonReader = if (readVector) { + model.setDirectVectorFill(true) val vectorizedReader = new VectorizedCarbonRecordReader(model, null, supportBatchValue.toString) diff --git a/integration/spark-datasource/src/main/spark2.1andspark2.2/org/apache/spark/sql/CarbonVectorProxy.java b/integration/spark-datasource/src/main/spark2.1andspark2.2/org/apache/spark/sql/CarbonVectorProxy.java index 80e6dbd946c..03466cc9354 100644 --- a/integration/spark-datasource/src/main/spark2.1andspark2.2/org/apache/spark/sql/CarbonVectorProxy.java +++ b/integration/spark-datasource/src/main/spark2.1andspark2.2/org/apache/spark/sql/CarbonVectorProxy.java @@ -45,6 +45,8 @@ public class CarbonVectorProxy { private ColumnarBatch columnarBatch; + private ColumnVectorProxy[] columnVectorProxies; + /** * Adapter class which handles the columnar vector reading of the carbondata * based on the spark ColumnVector and ColumnarBatch API. This proxy class @@ -57,14 +59,22 @@ public class CarbonVectorProxy { */ public CarbonVectorProxy(MemoryMode memMode, int rowNum, StructField[] structFileds) { columnarBatch = ColumnarBatch.allocate(new StructType(structFileds), memMode, rowNum); + columnVectorProxies = new ColumnVectorProxy[columnarBatch.numCols()]; + for (int i = 0; i < columnVectorProxies.length; i++) { + columnVectorProxies[i] = new ColumnVectorProxy(columnarBatch, i); + } } public CarbonVectorProxy(MemoryMode memMode, StructType outputSchema, int rowNum) { columnarBatch = ColumnarBatch.allocate(outputSchema, memMode, rowNum); + columnVectorProxies = new ColumnVectorProxy[columnarBatch.numCols()]; + for (int i = 0; i < columnVectorProxies.length; i++) { + columnVectorProxies[i] = new ColumnVectorProxy(columnarBatch, i); + } } - public ColumnVector getColumnVector(int ordinal) { - return columnarBatch.column(ordinal); + public ColumnVectorProxy getColumnVector(int ordinal) { + return columnVectorProxies[ordinal]; } /** @@ -74,9 +84,6 @@ public void setNumRows(int numRows) { columnarBatch.setNumRows(numRows); } - public Object reserveDictionaryIds(int capacity , int ordinal) { - return columnarBatch.column(ordinal).reserveDictionaryIds(capacity); - } /** * Returns the number of rows for read, including filtered rows. @@ -85,22 +92,6 @@ public int numRows() { return columnarBatch.capacity(); } - public void setDictionary(CarbonDictionary dictionary, int ordinal) { - if (null != dictionary) { - columnarBatch.column(ordinal) - .setDictionary(new CarbonDictionaryWrapper(Encoding.PLAIN, dictionary)); - } else { - columnarBatch.column(ordinal).setDictionary(null); - } - } - - public void putNull(int rowId, int ordinal) { - columnarBatch.column(ordinal).putNull(rowId); - } - - public void putNulls(int rowId, int count, int ordinal) { - columnarBatch.column(ordinal).putNulls(rowId, count); - } /** * Called to close all the columns in this batch. It is not valid to access the data after @@ -139,9 +130,7 @@ public ColumnVector column(int ordinal) { return columnarBatch.column(ordinal); } - public boolean hasDictionary(int ordinal) { - return columnarBatch.column(ordinal).hasDictionary(); - } + /** * Resets this column for writing. The currently stored values are no longer accessible. @@ -150,127 +139,187 @@ public void reset() { columnarBatch.reset(); } - public void putRowToColumnBatch(int rowId, Object value, int offset) { - org.apache.spark.sql.types.DataType t = dataType(offset); - if (null == value) { - putNull(rowId, offset); - } else { - if (t == org.apache.spark.sql.types.DataTypes.BooleanType) { - putBoolean(rowId, (boolean) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.ByteType) { - putByte(rowId, (byte) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.ShortType) { - putShort(rowId, (short) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.IntegerType) { - putInt(rowId, (int) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.LongType) { - putLong(rowId, (long) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.FloatType) { - putFloat(rowId, (float) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.DoubleType) { - putDouble(rowId, (double) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.StringType) { - UTF8String v = (UTF8String) value; - putByteArray(rowId, v.getBytes(), offset); - } else if (t instanceof org.apache.spark.sql.types.DecimalType) { - DecimalType dt = (DecimalType) t; - Decimal d = Decimal.fromDecimal(value); - if (dt.precision() <= Decimal.MAX_INT_DIGITS()) { - putInt(rowId, (int) d.toUnscaledLong(), offset); - } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { - putLong(rowId, d.toUnscaledLong(), offset); - } else { - final BigInteger integer = d.toJavaBigDecimal().unscaledValue(); - byte[] bytes = integer.toByteArray(); - putByteArray(rowId, bytes, 0, bytes.length, offset); + + public static class ColumnVectorProxy { + + private ColumnVector vector; + + public ColumnVectorProxy(ColumnarBatch columnarBatch, int ordinal) { + this.vector = columnarBatch.column(ordinal); + } + + public void putRowToColumnBatch(int rowId, Object value, int offset) { + org.apache.spark.sql.types.DataType t = dataType(offset); + if (null == value) { + putNull(rowId, offset); + } else { + if (t == org.apache.spark.sql.types.DataTypes.BooleanType) { + putBoolean(rowId, (boolean) value, offset); + } else if (t == org.apache.spark.sql.types.DataTypes.ByteType) { + putByte(rowId, (byte) value, offset); + } else if (t == org.apache.spark.sql.types.DataTypes.ShortType) { + putShort(rowId, (short) value, offset); + } else if (t == org.apache.spark.sql.types.DataTypes.IntegerType) { + putInt(rowId, (int) value, offset); + } else if (t == org.apache.spark.sql.types.DataTypes.LongType) { + putLong(rowId, (long) value, offset); + } else if (t == org.apache.spark.sql.types.DataTypes.FloatType) { + putFloat(rowId, (float) value, offset); + } else if (t == org.apache.spark.sql.types.DataTypes.DoubleType) { + putDouble(rowId, (double) value, offset); + } else if (t == org.apache.spark.sql.types.DataTypes.StringType) { + UTF8String v = (UTF8String) value; + putByteArray(rowId, v.getBytes(), offset); + } else if (t instanceof org.apache.spark.sql.types.DecimalType) { + DecimalType dt = (DecimalType) t; + Decimal d = Decimal.fromDecimal(value); + if (dt.precision() <= Decimal.MAX_INT_DIGITS()) { + putInt(rowId, (int) d.toUnscaledLong(), offset); + } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { + putLong(rowId, d.toUnscaledLong(), offset); + } else { + final BigInteger integer = d.toJavaBigDecimal().unscaledValue(); + byte[] bytes = integer.toByteArray(); + putByteArray(rowId, bytes, 0, bytes.length, offset); + } + } else if (t instanceof CalendarIntervalType) { + CalendarInterval c = (CalendarInterval) value; + vector.getChildColumn(0).putInt(rowId, c.months); + vector.getChildColumn(1).putLong(rowId, c.microseconds); + } else if (t instanceof org.apache.spark.sql.types.DateType) { + putInt(rowId, (int) value, offset); + } else if (t instanceof org.apache.spark.sql.types.TimestampType) { + putLong(rowId, (long) value, offset); } - } else if (t instanceof CalendarIntervalType) { - CalendarInterval c = (CalendarInterval) value; - columnarBatch.column(offset).getChildColumn(0).putInt(rowId, c.months); - columnarBatch.column(offset).getChildColumn(1).putLong(rowId, c.microseconds); - } else if (t instanceof org.apache.spark.sql.types.DateType) { - putInt(rowId, (int) value, offset); - } else if (t instanceof org.apache.spark.sql.types.TimestampType) { - putLong(rowId, (long) value, offset); } } - } - public void putBoolean(int rowId, boolean value, int ordinal) { - columnarBatch.column(ordinal).putBoolean(rowId, (boolean) value); - } + public void putBoolean(int rowId, boolean value, int ordinal) { + vector.putBoolean(rowId, value); + } - public void putByte(int rowId, byte value, int ordinal) { - columnarBatch.column(ordinal).putByte(rowId, (byte) value); - } + public void putByte(int rowId, byte value, int ordinal) { + vector.putByte(rowId, value); + } - public void putShort(int rowId, short value, int ordinal) { - columnarBatch.column(ordinal).putShort(rowId, (short) value); - } + public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + vector.putBytes(rowId, count, src, srcIndex); + } - public void putInt(int rowId, int value, int ordinal) { - columnarBatch.column(ordinal).putInt(rowId, (int) value); - } + public void putShort(int rowId, short value, int ordinal) { + vector.putShort(rowId, value); + } - public void putFloat(int rowId, float value, int ordinal) { - columnarBatch.column(ordinal).putFloat(rowId, (float) value); - } + public void putInt(int rowId, int value, int ordinal) { + vector.putInt(rowId, value); + } - public void putLong(int rowId, long value, int ordinal) { - columnarBatch.column(ordinal).putLong(rowId, (long) value); - } + public void putFloat(int rowId, float value, int ordinal) { + vector.putFloat(rowId, value); + } - public void putDouble(int rowId, double value, int ordinal) { - columnarBatch.column(ordinal).putDouble(rowId, (double) value); - } + public void putFloats(int rowId, int count, float[] src, int srcIndex) { + vector.putFloats(rowId, count, src, srcIndex); + } - public void putByteArray(int rowId, byte[] value, int ordinal) { - columnarBatch.column(ordinal).putByteArray(rowId, (byte[]) value); - } + public void putLong(int rowId, long value, int ordinal) { + vector.putLong(rowId, value); + } - public void putInts(int rowId, int count, int value, int ordinal) { - columnarBatch.column(ordinal).putInts(rowId, count, value); - } + public void putDouble(int rowId, double value, int ordinal) { + vector.putDouble(rowId, value); + } - public void putShorts(int rowId, int count, short value, int ordinal) { - columnarBatch.column(ordinal).putShorts(rowId, count, value); - } + public void putByteArray(int rowId, byte[] value, int ordinal) { + vector.putByteArray(rowId, value); + } - public void putLongs(int rowId, int count, long value, int ordinal) { - columnarBatch.column(ordinal).putLongs(rowId, count, value); - } + public void putInts(int rowId, int count, int value, int ordinal) { + vector.putInts(rowId, count, value); + } - public void putDecimal(int rowId, Decimal value, int precision, int ordinal) { - columnarBatch.column(ordinal).putDecimal(rowId, value, precision); + public void putInts(int rowId, int count, int[] src, int srcIndex) { + vector.putInts(rowId, count, src, srcIndex); + } - } + public void putShorts(int rowId, int count, short value, int ordinal) { + vector.putShorts(rowId, count, value); + } - public void putDoubles(int rowId, int count, double value, int ordinal) { - columnarBatch.column(ordinal).putDoubles(rowId, count, value); - } + public void putShorts(int rowId, int count, short[] src, int srcIndex) { + vector.putShorts(rowId, count, src, srcIndex); + } - public void putByteArray(int rowId, byte[] value, int offset, int length, int ordinal) { - columnarBatch.column(ordinal).putByteArray(rowId, (byte[]) value, offset, length); - } + public void putLongs(int rowId, int count, long value, int ordinal) { + vector.putLongs(rowId, count, value); + } - public boolean isNullAt(int rowId, int ordinal) { - return columnarBatch - .column(ordinal).isNullAt(rowId); - } + public void putLongs(int rowId, int count, long[] src, int srcIndex) { + vector.putLongs(rowId, count, src, srcIndex); + } - public DataType dataType(int ordinal) { - return columnarBatch.column(ordinal).dataType(); - } + public void putDecimal(int rowId, Decimal value, int precision, int ordinal) { + vector.putDecimal(rowId, value, precision); - public void putNotNull(int rowId, int ordinal) { - columnarBatch.column(ordinal).putNotNull(rowId); - } + } - public void putNotNulls(int rowId, int count, int ordinal) { - columnarBatch.column(ordinal).putNotNulls(rowId, count); - } + public void putDoubles(int rowId, int count, double value, int ordinal) { + vector.putDoubles(rowId, count, value); + } + + public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + vector.putDoubles(rowId, count, src, srcIndex); + } + + public void putByteArray(int rowId, byte[] value, int offset, int length, int ordinal) { + vector.putByteArray(rowId, value, offset, length); + } + + public boolean isNullAt(int rowId, int ordinal) { + return vector.isNullAt(rowId); + } + + public DataType dataType(int ordinal) { + return vector.dataType(); + } + + public void putNotNull(int rowId, int ordinal) { + vector.putNotNull(rowId); + } + + public void putNotNulls(int rowId, int count, int ordinal) { + vector.putNotNulls(rowId, count); + } + + public void putDictionaryInt(int rowId, int value, int ordinal) { + vector.getDictionaryIds().putInt(rowId, value); + } + + public void setDictionary(CarbonDictionary dictionary, int ordinal) { + if (null != dictionary) { + vector.setDictionary(new CarbonDictionaryWrapper(Encoding.PLAIN, dictionary)); + } else { + vector.setDictionary(null); + } + } + + public void putNull(int rowId, int ordinal) { + vector.putNull(rowId); + } + + public void putNulls(int rowId, int count, int ordinal) { + vector.putNulls(rowId, count); + } + + public boolean hasDictionary(int ordinal) { + return vector.hasDictionary(); + } + + public Object reserveDictionaryIds(int capacity , int ordinal) { + return vector.reserveDictionaryIds(capacity); + } + + - public void putDictionaryInt(int rowId, int value, int ordinal) { - columnarBatch.column(ordinal).getDictionaryIds().putInt(rowId, (int) value); } } diff --git a/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonDictionaryWrapper.java b/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonDictionaryWrapper.java index 5a99c682263..bd8c57ca4d7 100644 --- a/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonDictionaryWrapper.java +++ b/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonDictionaryWrapper.java @@ -28,10 +28,7 @@ public class CarbonDictionaryWrapper implements Dictionary { private byte[][] binaries; CarbonDictionaryWrapper(CarbonDictionary dictionary) { - binaries = new byte[dictionary.getDictionarySize()][]; - for (int i = 0; i < binaries.length; i++) { - binaries[i] = dictionary.getDictionaryValue(i); - } + binaries = dictionary.getAllDictionaryValues(); } @Override public int decodeToInt(int id) { diff --git a/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonVectorProxy.java b/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonVectorProxy.java index 4a0fb9eb4cf..bd74b056470 100644 --- a/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonVectorProxy.java +++ b/integration/spark-datasource/src/main/spark2.3plus/org/apache/spark/sql/CarbonVectorProxy.java @@ -22,7 +22,6 @@ import org.apache.spark.memory.MemoryMode; import org.apache.spark.sql.catalyst.InternalRow; -import org.apache.spark.sql.execution.vectorized.Dictionary; import org.apache.spark.sql.execution.vectorized.WritableColumnVector; import org.apache.spark.sql.types.*; import org.apache.spark.sql.vectorized.ColumnarBatch; @@ -38,7 +37,7 @@ public class CarbonVectorProxy { private ColumnarBatch columnarBatch; - private WritableColumnVector[] columnVectors; + private ColumnVectorProxy[] columnVectorProxies; /** * Adapter class which handles the columnar vector reading of the carbondata @@ -51,17 +50,25 @@ public class CarbonVectorProxy { * @param structFileds, metadata related to current schema of table. */ public CarbonVectorProxy(MemoryMode memMode, int rowNum, StructField[] structFileds) { - columnVectors = ColumnVectorFactory - .getColumnVector(memMode, new StructType(structFileds), rowNum); + WritableColumnVector[] columnVectors = + ColumnVectorFactory.getColumnVector(memMode, new StructType(structFileds), rowNum); columnarBatch = new ColumnarBatch(columnVectors); columnarBatch.setNumRows(rowNum); + columnVectorProxies = new ColumnVectorProxy[columnarBatch.numCols()]; + for (int i = 0; i < columnVectorProxies.length; i++) { + columnVectorProxies[i] = new ColumnVectorProxy(columnarBatch, i); + } } public CarbonVectorProxy(MemoryMode memMode, StructType outputSchema, int rowNum) { - columnVectors = ColumnVectorFactory + WritableColumnVector[] columnVectors = ColumnVectorFactory .getColumnVector(memMode, outputSchema, rowNum); columnarBatch = new ColumnarBatch(columnVectors); columnarBatch.setNumRows(rowNum); + columnVectorProxies = new ColumnVectorProxy[columnarBatch.numCols()]; + for (int i = 0; i < columnVectorProxies.length; i++) { + columnVectorProxies[i] = new ColumnVectorProxy(columnarBatch, i); + } } /** @@ -71,10 +78,6 @@ public int numRows() { return columnarBatch.numRows(); } - public Object reserveDictionaryIds(int capacity, int ordinal) { - return columnVectors[ordinal].reserveDictionaryIds(capacity); - } - /** * This API will return a columnvector from a batch of column vector rows * based on the ordinal @@ -86,21 +89,20 @@ public WritableColumnVector column(int ordinal) { return (WritableColumnVector) columnarBatch.column(ordinal); } - public WritableColumnVector getColumnVector(int ordinal) { - return columnVectors[ordinal]; + public ColumnVectorProxy getColumnVector(int ordinal) { + return columnVectorProxies[ordinal]; } - /** * Resets this column for writing. The currently stored values are no longer accessible. */ public void reset() { - for (WritableColumnVector col : columnVectors) { - col.reset(); + for (int i = 0; i < columnarBatch.numCols(); i++) { + ((WritableColumnVector)columnarBatch.column(i)).reset(); } } public void resetDictionaryIds(int ordinal) { - columnVectors[ordinal].getDictionaryIds().reset(); + ((WritableColumnVector)columnarBatch.column(ordinal)).getDictionaryIds().reset(); } /** @@ -133,146 +135,189 @@ public void setNumRows(int numRows) { columnarBatch.setNumRows(numRows); } - public void putRowToColumnBatch(int rowId, Object value, int offset) { - org.apache.spark.sql.types.DataType t = dataType(offset); - if (null == value) { - putNull(rowId, offset); - } else { - if (t == org.apache.spark.sql.types.DataTypes.BooleanType) { - putBoolean(rowId, (boolean) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.ByteType) { - putByte(rowId, (byte) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.ShortType) { - putShort(rowId, (short) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.IntegerType) { - putInt(rowId, (int) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.LongType) { - putLong(rowId, (long) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.FloatType) { - putFloat(rowId, (float) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.DoubleType) { - putDouble(rowId, (double) value, offset); - } else if (t == org.apache.spark.sql.types.DataTypes.StringType) { - UTF8String v = (UTF8String) value; - putByteArray(rowId, v.getBytes(), offset); - } else if (t instanceof DecimalType) { - DecimalType dt = (DecimalType) t; - Decimal d = Decimal.fromDecimal(value); - if (dt.precision() <= Decimal.MAX_INT_DIGITS()) { - putInt(rowId, (int) d.toUnscaledLong(), offset); - } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { - putLong(rowId, d.toUnscaledLong(), offset); - } else { - final BigInteger integer = d.toJavaBigDecimal().unscaledValue(); - byte[] bytes = integer.toByteArray(); - putByteArray(rowId, bytes, 0, bytes.length, offset); + + public DataType dataType(int ordinal) { + return columnarBatch.column(ordinal).dataType(); + } + + public static class ColumnVectorProxy { + + private WritableColumnVector vector; + + public ColumnVectorProxy(ColumnarBatch columnarBatch, int ordinal) { + vector = (WritableColumnVector) columnarBatch.column(ordinal); + } + + public void putRowToColumnBatch(int rowId, Object value, int offset) { + DataType t = dataType(offset); + if (null == value) { + putNull(rowId, offset); + } else { + if (t == DataTypes.BooleanType) { + putBoolean(rowId, (boolean) value, offset); + } else if (t == DataTypes.ByteType) { + putByte(rowId, (byte) value, offset); + } else if (t == DataTypes.ShortType) { + putShort(rowId, (short) value, offset); + } else if (t == DataTypes.IntegerType) { + putInt(rowId, (int) value, offset); + } else if (t == DataTypes.LongType) { + putLong(rowId, (long) value, offset); + } else if (t == DataTypes.FloatType) { + putFloat(rowId, (float) value, offset); + } else if (t == DataTypes.DoubleType) { + putDouble(rowId, (double) value, offset); + } else if (t == DataTypes.StringType) { + UTF8String v = (UTF8String) value; + putByteArray(rowId, v.getBytes(), offset); + } else if (t instanceof DecimalType) { + DecimalType dt = (DecimalType) t; + Decimal d = Decimal.fromDecimal(value); + if (dt.precision() <= Decimal.MAX_INT_DIGITS()) { + putInt(rowId, (int) d.toUnscaledLong(), offset); + } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { + putLong(rowId, d.toUnscaledLong(), offset); + } else { + final BigInteger integer = d.toJavaBigDecimal().unscaledValue(); + byte[] bytes = integer.toByteArray(); + putByteArray(rowId, bytes, 0, bytes.length, offset); + } + } else if (t instanceof CalendarIntervalType) { + CalendarInterval c = (CalendarInterval) value; + vector.getChild(0).putInt(rowId, c.months); + vector.getChild(1).putLong(rowId, c.microseconds); + } else if (t instanceof DateType) { + putInt(rowId, (int) value, offset); + } else if (t instanceof TimestampType) { + putLong(rowId, (long) value, offset); } - } else if (t instanceof CalendarIntervalType) { - CalendarInterval c = (CalendarInterval) value; - columnVectors[offset].getChild(0).putInt(rowId, c.months); - columnVectors[offset].getChild(1).putLong(rowId, c.microseconds); - } else if (t instanceof org.apache.spark.sql.types.DateType) { - putInt(rowId, (int) value, offset); - } else if (t instanceof org.apache.spark.sql.types.TimestampType) { - putLong(rowId, (long) value, offset); } } - } - public void putBoolean(int rowId, boolean value, int ordinal) { - columnVectors[ordinal].putBoolean(rowId, (boolean) value); - } + public void putBoolean(int rowId, boolean value, int ordinal) { + vector.putBoolean(rowId, value); + } - public void putByte(int rowId, byte value, int ordinal) { - columnVectors[ordinal].putByte(rowId, (byte) value); - } + public void putByte(int rowId, byte value, int ordinal) { + vector.putByte(rowId, value); + } - public void putShort(int rowId, short value, int ordinal) { - columnVectors[ordinal].putShort(rowId, (short) value); - } + public void putBytes(int rowId, int count, byte[] src, int srcIndex) { + vector.putBytes(rowId, count, src, srcIndex); + } - public void putInt(int rowId, int value, int ordinal) { - columnVectors[ordinal].putInt(rowId, (int) value); - } + public void putShort(int rowId, short value, int ordinal) { + vector.putShort(rowId, value); + } - public void putDictionaryInt(int rowId, int value, int ordinal) { - columnVectors[ordinal].getDictionaryIds().putInt(rowId, (int) value); - } + public void putInt(int rowId, int value, int ordinal) { + vector.putInt(rowId, value); + } - public void putFloat(int rowId, float value, int ordinal) { - columnVectors[ordinal].putFloat(rowId, (float) value); - } + public void putFloat(int rowId, float value, int ordinal) { + vector.putFloat(rowId, value); + } - public void putLong(int rowId, long value, int ordinal) { - columnVectors[ordinal].putLong(rowId, (long) value); - } + public void putFloats(int rowId, int count, float[] src, int srcIndex) { + vector.putFloats(rowId, count, src, srcIndex); + } - public void putDouble(int rowId, double value, int ordinal) { - columnVectors[ordinal].putDouble(rowId, (double) value); - } + public void putLong(int rowId, long value, int ordinal) { + vector.putLong(rowId, value); + } - public void putByteArray(int rowId, byte[] value, int ordinal) { - columnVectors[ordinal].putByteArray(rowId, (byte[]) value); - } + public void putDouble(int rowId, double value, int ordinal) { + vector.putDouble(rowId, value); + } - public void putInts(int rowId, int count, int value, int ordinal) { - columnVectors[ordinal].putInts(rowId, count, value); - } + public void putByteArray(int rowId, byte[] value, int ordinal) { + vector.putByteArray(rowId, value); + } - public void putShorts(int rowId, int count, short value, int ordinal) { - columnVectors[ordinal].putShorts(rowId, count, value); - } + public void putInts(int rowId, int count, int value, int ordinal) { + vector.putInts(rowId, count, value); + } - public void putLongs(int rowId, int count, long value, int ordinal) { - columnVectors[ordinal].putLongs(rowId, count, value); - } + public void putInts(int rowId, int count, int[] src, int srcIndex) { + vector.putInts(rowId, count, src, srcIndex); + } - public void putDecimal(int rowId, Decimal value, int precision, int ordinal) { - columnVectors[ordinal].putDecimal(rowId, value, precision); + public void putShorts(int rowId, int count, short value, int ordinal) { + vector.putShorts(rowId, count, value); + } - } + public void putShorts(int rowId, int count, short[] src, int srcIndex) { + vector.putShorts(rowId, count, src, srcIndex); + } - public void putDoubles(int rowId, int count, double value, int ordinal) { - columnVectors[ordinal].putDoubles(rowId, count, value); - } + public void putLongs(int rowId, int count, long value, int ordinal) { + vector.putLongs(rowId, count, value); + } - public void putByteArray(int rowId, byte[] value, int offset, int length, int ordinal) { - columnVectors[ordinal].putByteArray(rowId, (byte[]) value, offset, length); - } + public void putLongs(int rowId, int count, long[] src, int srcIndex) { + vector.putLongs(rowId, count, src, srcIndex); + } - public void putNull(int rowId, int ordinal) { - columnVectors[ordinal].putNull(rowId); - } + public void putDecimal(int rowId, Decimal value, int precision, int ordinal) { + vector.putDecimal(rowId, value, precision); - public void putNulls(int rowId, int count, int ordinal) { - columnVectors[ordinal].putNulls(rowId, count); - } + } - public void putNotNull(int rowId, int ordinal) { - columnVectors[ordinal].putNotNull(rowId); - } + public void putDoubles(int rowId, int count, double value, int ordinal) { + vector.putDoubles(rowId, count, value); + } - public void putNotNulls(int rowId, int count, int ordinal) { - columnVectors[ordinal].putNotNulls(rowId, count); - } + public void putDoubles(int rowId, int count, double[] src, int srcIndex) { + vector.putDoubles(rowId, count, src, srcIndex); + } - public boolean isNullAt(int rowId, int ordinal) { - return columnVectors[ordinal].isNullAt(rowId); - } + public void putByteArray(int rowId, byte[] value, int offset, int length, int ordinal) { + vector.putByteArray(rowId, value, offset, length); + } - public boolean hasDictionary(int ordinal) { - return columnVectors[ordinal].hasDictionary(); - } + public boolean isNullAt(int rowId, int ordinal) { + return vector.isNullAt(rowId); + } - public void setDictionary(CarbonDictionary dictionary, int ordinal) { + public DataType dataType(int ordinal) { + return vector.dataType(); + } + + public void putNotNull(int rowId, int ordinal) { + vector.putNotNull(rowId); + } + + public void putNotNulls(int rowId, int count, int ordinal) { + vector.putNotNulls(rowId, count); + } + + public void putDictionaryInt(int rowId, int value, int ordinal) { + vector.getDictionaryIds().putInt(rowId, value); + } + + public void setDictionary(CarbonDictionary dictionary, int ordinal) { if (null != dictionary) { - columnVectors[ordinal].setDictionary(new CarbonDictionaryWrapper(dictionary)); + vector.setDictionary(new CarbonDictionaryWrapper(dictionary)); } else { - columnVectors[ordinal].setDictionary(null); + vector.setDictionary(null); + } + } + + public void putNull(int rowId, int ordinal) { + vector.putNull(rowId); + } + + public void putNulls(int rowId, int count, int ordinal) { + vector.putNulls(rowId, count); + } + + public boolean hasDictionary(int ordinal) { + return vector.hasDictionary(); + } + + public Object reserveDictionaryIds(int capacity, int ordinal) { + return vector.reserveDictionaryIds(capacity); } - } - public DataType dataType(int ordinal) { - return columnVectors[ordinal].dataType(); } } diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/stream/CarbonStreamRecordReader.java b/integration/spark2/src/main/scala/org/apache/carbondata/stream/CarbonStreamRecordReader.java index 6c652856b99..3330e8b8fac 100644 --- a/integration/spark2/src/main/scala/org/apache/carbondata/stream/CarbonStreamRecordReader.java +++ b/integration/spark2/src/main/scala/org/apache/carbondata/stream/CarbonStreamRecordReader.java @@ -705,7 +705,7 @@ private void readRawRowFromStream() { private void putRowToColumnBatch(int rowId) { for (int i = 0; i < projection.length; i++) { Object value = outputValues[i]; - vectorProxy.putRowToColumnBatch(rowId,value,i); + vectorProxy.getColumnVector(i).putRowToColumnBatch(rowId,value,i); } } diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala index f0184cddb44..2a2a988155e 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/strategy/CarbonLateDecodeStrategy.scala @@ -57,6 +57,8 @@ import org.apache.carbondata.spark.util.CarbonScalaUtil private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { val PUSHED_FILTERS = "PushedFilters" + val vectorPushRowFilters = CarbonProperties.getInstance().getPushRowFiltersForVector + /* Spark 2.3.1 plan there can be case of multiple projections like below Project [substring(name, 1, 2)#124, name#123, tupleId#117, cast(rand(-6778822102499951904)#125 @@ -213,11 +215,12 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { rdd: RDD[InternalRow], needDecode: ArrayBuffer[AttributeReference]): RDD[InternalRow] = { + val scanRdd = rdd.asInstanceOf[CarbonScanRDD[InternalRow]] if (needDecode.nonEmpty) { - rdd.asInstanceOf[CarbonScanRDD[InternalRow]].setVectorReaderSupport(false) + scanRdd.setVectorReaderSupport(false) getDecoderRDD(relation, needDecode, rdd, output) } else { - rdd.asInstanceOf[CarbonScanRDD[InternalRow]] + scanRdd .setVectorReaderSupport(supportBatchedDataSource(relation.relation.sqlContext, output)) rdd } @@ -300,7 +303,7 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { } } } - + val hasDictionaryFilterCols = hasFilterOnDictionaryColumn(filterSet, table) if (projects.map(_.toAttribute) == projects && projectSet.size == projects.size && filterSet.subsetOf(projectSet)) { @@ -337,19 +340,35 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { metadata, needDecoder, updateRequestedColumns.asInstanceOf[Seq[Attribute]]) - filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan) + // Check whether spark should handle row filters in case of vector flow. + if (!vectorPushRowFilters && scan.isInstanceOf[CarbonDataSourceScan] + && !hasDictionaryFilterCols) { + // Here carbon only do page pruning and row level pruning will be done by spark. + scan.inputRDDs().head match { + case rdd: CarbonScanRDD[InternalRow] => + rdd.setDirectScanSupport(true) + case _ => + } + filterPredicates.reduceLeftOption(expressions.And).map(execution.FilterExec(_, scan)) + .getOrElse(scan) + } else { + filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan) + } } else { var newProjectList: Seq[Attribute] = Seq.empty + var implictsExisted = false val updatedProjects = projects.map { case a@Alias(s: ScalaUDF, name) if name.equalsIgnoreCase(CarbonCommonConstants.POSITION_ID) || name.equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID) => val reference = AttributeReference(name, StringType, true)().withExprId(a.exprId) newProjectList :+= reference + implictsExisted = true reference case a@Alias(s: ScalaUDF, name) if name.equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_SEGMENTID) => + implictsExisted = true val reference = AttributeReference(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID, StringType, true)().withExprId(a.exprId) @@ -363,7 +382,24 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { // Don't request columns that are only referenced by pushed filters. val requestedColumns = (projectSet ++ filterSet -- handledSet).map(relation.attributeMap).toSeq ++ newProjectList - val updateRequestedColumns = updateRequestedColumnsFunc(requestedColumns, table, needDecoder) + + var updateRequestedColumns = + if (!vectorPushRowFilters && !implictsExisted && !hasDictionaryFilterCols) { + updateRequestedColumnsFunc( + (projectSet ++ filterSet).map(relation.attributeMap).toSeq, + table, + needDecoder) + } else { + updateRequestedColumnsFunc(requestedColumns, table, needDecoder) + } + val supportBatch = + supportBatchedDataSource(relation.relation.sqlContext, + updateRequestedColumns.asInstanceOf[Seq[Attribute]]) && + needDecoder.isEmpty + if (!vectorPushRowFilters && !supportBatch && !implictsExisted && !hasDictionaryFilterCols) { + // revert for row scan + updateRequestedColumns = updateRequestedColumnsFunc(requestedColumns, table, needDecoder) + } val scan = getDataSourceScan(relation, updateRequestedColumns.asInstanceOf[Seq[Attribute]], partitions, @@ -374,10 +410,27 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { metadata, needDecoder, updateRequestedColumns.asInstanceOf[Seq[Attribute]]) - execution.ProjectExec( - updateRequestedColumnsFunc(updatedProjects, table, - needDecoder).asInstanceOf[Seq[NamedExpression]], - filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan)) + // Check whether spark should handle row filters in case of vector flow. + if (!vectorPushRowFilters && scan.isInstanceOf[CarbonDataSourceScan] + && !implictsExisted && !hasDictionaryFilterCols) { + // Here carbon only do page pruning and row level pruning will be done by spark. + scan.inputRDDs().head match { + case rdd: CarbonScanRDD[InternalRow] => + rdd.setDirectScanSupport(true) + case _ => + } + execution.ProjectExec( + updateRequestedColumnsFunc(updatedProjects, table, + needDecoder).asInstanceOf[Seq[NamedExpression]], + filterPredicates.reduceLeftOption(expressions.And).map( + execution.FilterExec(_, scan)).getOrElse(scan)) + } else { + execution.ProjectExec( + updateRequestedColumnsFunc(updatedProjects, table, + needDecoder).asInstanceOf[Seq[NamedExpression]], + filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan)) + } + } } @@ -457,6 +510,12 @@ private[sql] class CarbonLateDecodeStrategy extends SparkStrategy { } } + private def hasFilterOnDictionaryColumn(filterColumns: AttributeSet, + relation: CarbonDatasourceHadoopRelation): Boolean = { + val map = relation.carbonRelation.metaData.dictionaryMap + filterColumns.exists(c => map.get(c.name).getOrElse(false)) + } + private def getPartitioning(carbonTable: CarbonTable, output: Seq[Attribute]): Partitioning = { val info: BucketingInfo = carbonTable.getBucketingInfo(carbonTable.getTableName)