From 7cf48d9a9b8bd7814c90c61172e06295fd3a1064 Mon Sep 17 00:00:00 2001 From: ravipesala Date: Sat, 17 Jun 2017 22:53:57 +0530 Subject: [PATCH 1/4] Added blocklet index implementation in datamap --- .../core/datastore/block/TableBlockInfo.java | 8 + ...DataMap.java => AbstractTableDataMap.java} | 46 ++- .../carbondata/core/indexstore/Blocklet.java | 55 +++ .../core/indexstore/DataMapStoreManager.java | 16 +- .../core/indexstore/UnsafeMemoryDMStore.java | 207 ++++++++++ .../blockletindex/BlockletDMComparator.java | 134 +++++++ .../blockletindex/BlockletDataMap.java | 369 ++++++++++++++++++ .../blockletindex/BlockletTableMap.java | 85 ++++ .../core/indexstore/row/DataMapRow.java | 86 ++++ .../core/indexstore/row/DataMapRowImpl.java | 85 ++++ .../core/indexstore/row/UnsafeDataMapRow.java | 111 ++++++ .../core/indexstore/schema/DataMapSchema.java | 124 ++++++ .../core/metadata/blocklet/BlockletInfo.java | 61 +++ .../core/metadata/index/BlockIndexInfo.java | 27 ++ .../util/AbstractDataFileFooterConverter.java | 51 +++ .../src/main/thrift/carbondata_index.thrift | 1 + 16 files changed, 1448 insertions(+), 18 deletions(-) rename core/src/main/java/org/apache/carbondata/core/indexstore/{TableDataMap.java => AbstractTableDataMap.java} (61%) create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index 44347cf23df..6aba13c8c34 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -88,6 +88,10 @@ public TableBlockInfo(String filePath, long blockOffset, String segmentId, Strin this.deletedDeltaFilePath = deletedDeltaFilePath; } + public TableBlockInfo() { + + } + /** * constructor to initialize the TbaleBlockInfo with BlockletInfos * @@ -319,4 +323,8 @@ public void setBlockStorageIdMap(Map blockStorageIdMap) { public String[] getDeletedDeltaFilePath() { return deletedDeltaFilePath; } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java similarity index 61% rename from core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java rename to core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java index e1532c82b82..1f97977f900 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/TableDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java @@ -16,6 +16,7 @@ */ package org.apache.carbondata.core.indexstore; +import java.util.ArrayList; import java.util.List; import org.apache.carbondata.core.events.EventListener; @@ -26,28 +27,28 @@ * DataMap at the table level, user can add any number of datamaps for one table. Depends * on the filter condition it can prune the blocklets. */ -public interface TableDataMap extends EventListener { +public abstract class AbstractTableDataMap implements EventListener { /** * It is called to initialize and load the required table datamap metadata. */ - void init(AbsoluteTableIdentifier identifier, String dataMapName); + public abstract void init(AbsoluteTableIdentifier identifier, String dataMapName); /** * Gives the writer to write the metadata information of this datamap at table level. * * @return */ - DataMapWriter getWriter(); + public abstract DataMapWriter getMetaDataWriter(); /** - * Create the datamap using the segmentid and name. + * Get the datamap writer for each segmentid. * * @param identifier * @param segmentId * @return */ - DataMap createDataMap(AbsoluteTableIdentifier identifier, String segmentId); + public abstract DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String segmentId); /** * Pass the valid segments and prune the datamap using filter expression @@ -56,7 +57,23 @@ public interface TableDataMap extends EventListener { * @param filterExp * @return */ - List prune(List segmentIds, FilterResolverIntf filterExp); + public List prune(List segmentIds, FilterResolverIntf filterExp) { + List blocklets = new ArrayList<>(); + for (String segmentId: segmentIds) { + List dataMaps = getDataMaps(segmentId); + for (DataMap dataMap: dataMaps) { + blocklets.addAll(dataMap.prune(filterExp)); + } + } + return blocklets; + } + + /** + * Get the datamap for segmentid + * @param segmentId + * @return + */ + protected abstract List getDataMaps(String segmentId); /** * This is used for making the datamap distributable. @@ -65,7 +82,7 @@ public interface TableDataMap extends EventListener { * * @return */ - List toDistributable(List segmentIds); + public abstract List toDistributable(List segmentIds); /** * This method is used from any machine after it is distributed. It takes the distributable object @@ -75,7 +92,16 @@ public interface TableDataMap extends EventListener { * @param filterExp * @return */ - List prune(DataMapDistributable distributable, FilterResolverIntf filterExp); + public List prune(DataMapDistributable distributable, FilterResolverIntf filterExp) { + return getDataMap(distributable).prune(filterExp); + } + + /** + * Get datamap for distributable object. + * @param distributable + * @return + */ + protected abstract DataMap getDataMap(DataMapDistributable distributable); /** * This method checks whether the columns and the type of filters supported @@ -84,11 +110,11 @@ public interface TableDataMap extends EventListener { * @param filterExp * @return */ - boolean isFiltersSupported(FilterResolverIntf filterExp); + public abstract boolean isFiltersSupported(FilterResolverIntf filterExp); /** * Clears table level datamap */ - void clear(); + public abstract void clear(); } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java index 597c46c0562..0de21a3abb2 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java @@ -18,6 +18,8 @@ import java.io.Serializable; +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; + /** * Blocklet */ @@ -27,6 +29,8 @@ public class Blocklet implements Serializable { private String blockletId; + private BlockletDetailInfo detailInfo; + public Blocklet(String path, String blockletId) { this.path = path; this.blockletId = blockletId; @@ -39,4 +43,55 @@ public String getPath() { public String getBlockletId() { return blockletId; } + + public BlockletDetailInfo getDetailInfo() { + return detailInfo; + } + + public void setDetailInfo(BlockletDetailInfo detailInfo) { + this.detailInfo = detailInfo; + } + + public static class BlockletDetailInfo implements Serializable { + + private int rowCount; + + private int pagesCount; + + private int versionNumber; + + private BlockletInfo blockletInfo; + + public int getRowCount() { + return rowCount; + } + + public void setRowCount(int rowCount) { + this.rowCount = rowCount; + } + + public int getPagesCount() { + return pagesCount; + } + + public void setPagesCount(int pagesCount) { + this.pagesCount = pagesCount; + } + + public int getVersionNumber() { + return versionNumber; + } + + public void setVersionNumber(int versionNumber) { + this.versionNumber = versionNumber; + } + + public BlockletInfo getBlockletInfo() { + return blockletInfo; + } + + public void setBlockletInfo(BlockletInfo blockletInfo) { + this.blockletInfo = blockletInfo; + } + } } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java index 06638ad46d3..de31581a2c9 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java @@ -30,7 +30,7 @@ public class DataMapStoreManager { private static DataMapStoreManager instance = new DataMapStoreManager(); - private Map> dataMapMappping = new HashMap<>(); + private Map> dataMapMappping = new HashMap<>(); private static final LogService LOGGER = LogServiceFactory.getLogService(DataMapStoreManager.class.getName()); @@ -46,10 +46,10 @@ private DataMapStoreManager() { * @param mapType * @return */ - public TableDataMap getDataMap(AbsoluteTableIdentifier identifier, String dataMapName, + public AbstractTableDataMap getDataMap(AbsoluteTableIdentifier identifier, String dataMapName, DataMapType mapType) { - Map map = dataMapMappping.get(mapType); - TableDataMap dataMap = null; + Map map = dataMapMappping.get(mapType); + AbstractTableDataMap dataMap = null; if (map == null) { throw new RuntimeException("Datamap does not exist"); } else { @@ -69,14 +69,14 @@ public TableDataMap getDataMap(AbsoluteTableIdentifier identifier, String dataMa * @param mapType * @return */ - public TableDataMap createTableDataMap(AbsoluteTableIdentifier identifier, DataMapType mapType, + public AbstractTableDataMap createTableDataMap(AbsoluteTableIdentifier identifier, DataMapType mapType, String dataMapName) { - Map map = dataMapMappping.get(mapType); + Map map = dataMapMappping.get(mapType); if (map == null) { map = new HashMap<>(); dataMapMappping.put(mapType, map); } - TableDataMap dataMap = map.get(dataMapName); + AbstractTableDataMap dataMap = map.get(dataMapName); if (dataMap != null) { throw new RuntimeException("Already datamap exists in that path with type " + mapType); } @@ -92,7 +92,7 @@ public TableDataMap createTableDataMap(AbsoluteTableIdentifier identifier, DataM } public void clearDataMap(String dataMapName, DataMapType mapType) { - Map map = dataMapMappping.get(mapType); + Map map = dataMapMappping.get(mapType); if (map != null && map.get(dataMapName) != null) { map.remove(dataMapName).clear(); } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java new file mode 100644 index 00000000000..49e9d34151a --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore; + +import org.apache.carbondata.core.indexstore.row.DataMapRow; +import org.apache.carbondata.core.indexstore.row.UnsafeDataMapRow; +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.memory.MemoryAllocator; +import org.apache.carbondata.core.memory.MemoryAllocatorFactory; +import org.apache.carbondata.core.memory.MemoryBlock; + +import static org.apache.carbondata.core.memory.CarbonUnsafe.BYTE_ARRAY_OFFSET; +import static org.apache.carbondata.core.memory.CarbonUnsafe.unsafe; + +/** + * Store the data to unsafe + */ +public class UnsafeMemoryDMStore { + + private MemoryBlock memoryBlock; + + private static int capacity = 8 * 1024 * 1024; + + private int allocatedSize; + + private int runningLength; + + private MemoryAllocator memoryAllocator; + + private boolean isMemoryFreed; + + private DataMapSchema[] schema; + + private int[] pointers; + + private int rowCount; + + public UnsafeMemoryDMStore(DataMapSchema[] schema) { + this.schema = schema; + this.memoryAllocator = MemoryAllocatorFactory.INSATANCE.getMemoryAllocator(); + this.allocatedSize = capacity; + this.memoryBlock = memoryAllocator.allocate(allocatedSize); + this.pointers = new int[1000]; + } + + /** + * Check memory is sufficient or not, if not sufficient allocate more memory and copy old data to + * new one. + * + * @param rowSize + */ + private void ensureSize(int rowSize) { + if (runningLength + rowSize >= allocatedSize) { + MemoryBlock allocate = + MemoryAllocatorFactory.INSATANCE.getMemoryAllocator().allocate(allocatedSize + capacity); + unsafe.copyMemory(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset(), + allocate.getBaseObject(), allocate.getBaseOffset(), runningLength); + memoryAllocator.free(memoryBlock); + allocatedSize = allocatedSize + capacity; + memoryBlock = allocate; + } + if (this.pointers.length <= rowCount + 1) { + int[] newPointer = new int[pointers.length + 1000]; + System.arraycopy(pointers, 0, newPointer, 0, pointers.length); + this.pointers = newPointer; + } + } + + /** + * Add the index row to unsafe. + * + * @param indexRow + * @return + */ + public void addIndexRowToUnsafe(DataMapRow indexRow) { + // First calculate the required memory to keep the row in unsafe + int rowSize = indexRow.getTotalSizeInBytes(); + // Check whether allocated memory is sufficient or not. + ensureSize(rowSize); + int pointer = runningLength; + + for (int i = 0; i < schema.length; i++) { + addToUnsafe(schema[i], indexRow, i); + } + pointers[rowCount++] = pointer; + } + + private void addToUnsafe(DataMapSchema schema, DataMapRow row, int index) { + switch (schema.getSchemaType()) { + case FIXED: + switch (schema.getDataType()) { + case BYTE: + unsafe.putByte(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength, + row.getByte(index)); + runningLength += row.getSizeInBytes(index); + break; + case SHORT: + unsafe + .putShort(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength, + row.getShort(index)); + runningLength += row.getSizeInBytes(index); + break; + case INT: + unsafe.putInt(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength, + row.getInt(index)); + runningLength += row.getSizeInBytes(index); + break; + case LONG: + unsafe.putLong(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength, + row.getLong(index)); + runningLength += row.getSizeInBytes(index); + break; + case FLOAT: + unsafe + .putFloat(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength, + row.getFloat(index)); + runningLength += row.getSizeInBytes(index); + break; + case DOUBLE: + unsafe + .putDouble(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset() + runningLength, + row.getDouble(index)); + runningLength += row.getSizeInBytes(index); + break; + case BYTE_ARRAY: + byte[] data = row.getByteArray(index); + unsafe.copyMemory(data, BYTE_ARRAY_OFFSET, memoryBlock.getBaseObject(), + memoryBlock.getBaseOffset() + runningLength, data.length); + runningLength += row.getSizeInBytes(index); + break; + } + break; + case VARIABLE: + byte[] data = row.getByteArray(index); + unsafe.putShort(memoryBlock.getBaseOffset() + runningLength, (short) data.length); + runningLength += 2; + unsafe.copyMemory(data, BYTE_ARRAY_OFFSET, memoryBlock.getBaseObject(), + memoryBlock.getBaseOffset() + runningLength, data.length); + runningLength += data.length; + break; + case STRUCT: + DataMapSchema[] childSchemas = + ((DataMapSchema.StructDataMapSchema) schema).getChildSchemas(); + DataMapRow struct = row.getRow(index); + for (int i = 0; i < childSchemas.length; i++) { + addToUnsafe(childSchemas[i], struct, i); + } + break; + } + } + + public DataMapRow getUnsafeRow(int index) { + assert (index < rowCount); + return new UnsafeDataMapRow(schema, memoryBlock, pointers[index]); + } + + public void finishWriting() { + if (runningLength < allocatedSize) { + MemoryBlock allocate = + MemoryAllocatorFactory.INSATANCE.getMemoryAllocator().allocate(runningLength); + unsafe.copyMemory(memoryBlock.getBaseObject(), memoryBlock.getBaseOffset(), + allocate.getBaseObject(), allocate.getBaseOffset(), runningLength); + memoryAllocator.free(memoryBlock); + memoryBlock = allocate; + } + // Compact pointers. + if (rowCount < pointers.length) { + int[] newPointer = new int[rowCount]; + System.arraycopy(pointers, 0, newPointer, 0, rowCount); + this.pointers = newPointer; + } + } + + public void freeMemory() { + if (!isMemoryFreed) { + memoryAllocator.free(memoryBlock); + isMemoryFreed = true; + } + } + + public int getMemoryUsed() { + return runningLength; + } + + public DataMapSchema[] getSchema() { + return schema; + } + + public int getRowCount() { + return rowCount; + } + +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java new file mode 100644 index 00000000000..9a5060023b9 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDMComparator.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore.blockletindex; + +import java.nio.ByteBuffer; +import java.util.Comparator; + +import org.apache.carbondata.core.indexstore.row.DataMapRow; +import org.apache.carbondata.core.util.ByteUtil; + +/** + * Data map comparator + */ +public class BlockletDMComparator implements Comparator { + + /** + * no dictionary column value is of variable length so in each column value + * it will -1 + */ + private static final int NO_DCITIONARY_COLUMN_VALUE = -1; + + /** + * sized of the short value in bytes + */ + private static final short SHORT_SIZE_IN_BYTES = 2; + + private int[] eachColumnValueSize; + + /** + * the number of no dictionary columns in SORT_COLUMNS + */ + private int numberOfNoDictSortColumns; + + /** + * the number of columns in SORT_COLUMNS + */ + private int numberOfSortColumns; + + public BlockletDMComparator(int[] eachColumnValueSize, int numberOfSortColumns, + int numberOfNoDictSortColumns) { + this.eachColumnValueSize = eachColumnValueSize; + this.numberOfNoDictSortColumns = numberOfNoDictSortColumns; + this.numberOfSortColumns = numberOfSortColumns; + } + + @Override public int compare(DataMapRow first, DataMapRow second) { + int dictionaryKeyOffset = 0; + int nonDictionaryKeyOffset = 0; + int compareResult = 0; + int processedNoDictionaryColumn = numberOfNoDictSortColumns; + byte[][] firstBytes = splitKey(first.getByteArray(0)); + byte[][] secondBytes = splitKey(first.getByteArray(0)); + byte[] firstNoDictionaryKeys = firstBytes[1]; + ByteBuffer firstNoDictionaryKeyBuffer = ByteBuffer.wrap(firstNoDictionaryKeys); + byte[] secondNoDictionaryKeys = secondBytes[1]; + ByteBuffer secondNoDictionaryKeyBuffer = ByteBuffer.wrap(secondNoDictionaryKeys); + int actualOffset = 0; + int actualOffset1 = 0; + int firstNoDcitionaryLength = 0; + int secondNodeDictionaryLength = 0; + + for (int i = 0; i < numberOfSortColumns; i++) { + + if (eachColumnValueSize[i] != NO_DCITIONARY_COLUMN_VALUE) { + byte[] firstDictionaryKeys = firstBytes[0]; + byte[] secondDictionaryKeys = secondBytes[0]; + compareResult = ByteUtil.UnsafeComparer.INSTANCE + .compareTo(firstDictionaryKeys, dictionaryKeyOffset, eachColumnValueSize[i], + secondDictionaryKeys, dictionaryKeyOffset, eachColumnValueSize[i]); + dictionaryKeyOffset += eachColumnValueSize[i]; + } else { + if (processedNoDictionaryColumn > 1) { + actualOffset = firstNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset); + firstNoDcitionaryLength = + firstNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset + SHORT_SIZE_IN_BYTES) + - actualOffset; + actualOffset1 = secondNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset); + secondNodeDictionaryLength = + secondNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset + SHORT_SIZE_IN_BYTES) + - actualOffset1; + compareResult = ByteUtil.UnsafeComparer.INSTANCE + .compareTo(firstNoDictionaryKeys, actualOffset, firstNoDcitionaryLength, + secondNoDictionaryKeys, actualOffset1, secondNodeDictionaryLength); + nonDictionaryKeyOffset += SHORT_SIZE_IN_BYTES; + processedNoDictionaryColumn--; + } else { + actualOffset = firstNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset); + actualOffset1 = secondNoDictionaryKeyBuffer.getShort(nonDictionaryKeyOffset); + firstNoDcitionaryLength = firstNoDictionaryKeys.length - actualOffset; + secondNodeDictionaryLength = secondNoDictionaryKeys.length - actualOffset1; + compareResult = ByteUtil.UnsafeComparer.INSTANCE + .compareTo(firstNoDictionaryKeys, actualOffset, firstNoDcitionaryLength, + secondNoDictionaryKeys, actualOffset1, secondNodeDictionaryLength); + } + } + if (compareResult != 0) { + return compareResult; + } + } + + return 0; + } + + /** + * Split the index key to dictionary and no dictionary. + * @param startKey + * @return + */ + private byte[][] splitKey(byte[] startKey) { + ByteBuffer buffer = ByteBuffer.wrap(startKey); + buffer.rewind(); + int dictonaryKeySize = buffer.getInt(); + int nonDictonaryKeySize = buffer.getInt(); + byte[] dictionaryKey = new byte[dictonaryKeySize]; + buffer.get(dictionaryKey); + byte[] nonDictionaryKey = new byte[nonDictonaryKeySize]; + buffer.get(nonDictionaryKey); + return new byte[][] {dictionaryKey, nonDictionaryKey}; + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java new file mode 100644 index 00000000000..5894bac2e60 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java @@ -0,0 +1,369 @@ +package org.apache.carbondata.core.indexstore.blockletindex; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Comparator; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.DataMap; +import org.apache.carbondata.core.indexstore.DataMapWriter; +import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore; +import org.apache.carbondata.core.indexstore.row.DataMapRow; +import org.apache.carbondata.core.indexstore.row.DataMapRowImpl; +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.keygenerator.KeyGenException; +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex; +import org.apache.carbondata.core.metadata.datatype.DataType; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.executer.FilterExecuter; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataFileFooterConverter; + +/** + * Datamap implementation for blocklet. + */ +public class BlockletDataMap implements DataMap { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(BlockletDataMap.class.getName()); + + private static int KEY_INDEX = 0; + + private static int MIN_VALUES_INDEX = 1; + + private static int MAX_VALUES_INDEX = 2; + + private static int ROW_COUNT_INDEX = 3; + + private static int FILE_PATH_INDEX = 4; + + private static int PAGE_COUNT_INDEX = 5; + + private static int VERSION_INDEX = 6; + + private static int BLOCK_INFO_INDEX = 7; + + private UnsafeMemoryDMStore unsafeMemoryDMStore; + + private SegmentProperties segmentProperties; + + @Override public DataMapWriter getWriter() { + return null; + } + + @Override public void init(String path) { + DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(); + try { + List indexInfo = fileFooterConverter.getIndexInfo(path); + for (DataFileFooter fileFooter : indexInfo) { + List columnInTable = fileFooter.getColumnInTable(); + if (segmentProperties == null) { + segmentProperties = new SegmentProperties(columnInTable, + fileFooter.getSegmentInfo().getColumnCardinality()); + createSchema(segmentProperties); + } + fileFooter = CarbonUtil.readMetadatFile(fileFooter.getBlockInfo().getTableBlockInfo()); + loadToUnsafe(fileFooter, segmentProperties); + } + + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties) { + int[] minMaxLen = segmentProperties.getEachDimColumnValueSize(); + List blockletList = fileFooter.getBlockletList(); + DataMapSchema[] schema = unsafeMemoryDMStore.getSchema(); + for (int index = 0; index < blockletList.size(); index++) { + DataMapRow row = new DataMapRowImpl(schema); + int ordinal = 0; + BlockletInfo blockletInfo = blockletList.get(index); + + // add start key as index key + row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++); + + BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex(); + addMinMax(minMaxLen, schema[ordinal++], minMaxIndex.getMinValues()); + addMinMax(minMaxLen, schema[ordinal++], minMaxIndex.getMaxValues()); + + row.setInt(blockletInfo.getNumberOfRows(), ordinal++); + + // add file path + byte[] filePath = fileFooter.getBlockInfo().getTableBlockInfo().getFilePath().getBytes(); + row.setByteArray(filePath, ordinal++); + + // add pages + row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++); + + // add version number + row.setShort(fileFooter.getVersionId().number(), ordinal++); + + // add blocklet info + byte[] serializedData; + try { + serializedData = blockletInfo.getSerializedData(); + } catch (IOException e) { + throw new RuntimeException(e); + } + row.setByteArray(serializedData, ordinal); + unsafeMemoryDMStore.addIndexRowToUnsafe(row); + } + unsafeMemoryDMStore.finishWriting(); + } + + private void addMinMax(int[] minMaxLen, DataMapSchema dataMapSchema, byte[][] minValues) { + DataMapSchema[] minSchemas = + ((DataMapSchema.StructDataMapSchema) dataMapSchema).getChildSchemas(); + DataMapRow minRow = new DataMapRowImpl(minSchemas); + int minOrdinal = 0; + // min value adding + for (int i = 0; i < minMaxLen.length; i++) { + minRow.setByteArray(minValues[i], minOrdinal++); + } + } + + private void createSchema(SegmentProperties segmentProperties) { + List indexSchemas = new ArrayList<>(); + + // Index key + indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY)); + int[] minMaxLen = segmentProperties.getEachDimColumnValueSize(); + // do it 2 times, one for min and one for max. + for (int k = 0; k < 2; k++) { + DataMapSchema[] mapSchemas = new DataMapSchema[minMaxLen.length]; + for (int i = 0; i < minMaxLen.length; i++) { + if (minMaxLen[i] <= 0) { + indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY)); + } else { + indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.BYTE_ARRAY, minMaxLen[i])); + } + } + DataMapSchema mapSchema = new DataMapSchema.StructDataMapSchema(DataType.STRUCT, mapSchemas); + indexSchemas.add(mapSchema); + } + + // for number of rows. + indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.INT)); + + // for table block path + indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY)); + + // for number of pages. + indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.SHORT)); + + // for version number. + indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.SHORT)); + + //for blocklet info + indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY)); + + unsafeMemoryDMStore = + new UnsafeMemoryDMStore(indexSchemas.toArray(new DataMapSchema[indexSchemas.size()])); + } + + @Override public List prune(FilterResolverIntf filterExp) { + + // getting the start and end index key based on filter for hitting the + // selected block reference nodes based on filter resolver tree. + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("preparing the start and end key for finding" + + "start and end block as per filter resolver"); + } + List blocklets = new ArrayList<>(); + Comparator comparator = + new BlockletDMComparator(segmentProperties.getEachDimColumnValueSize(), + segmentProperties.getNumberOfSortColumns(), + segmentProperties.getNumberOfNoDictSortColumns()); + List listOfStartEndKeys = new ArrayList(2); + FilterUtil + .traverseResolverTreeAndGetStartAndEndKey(segmentProperties, filterExp, listOfStartEndKeys); + // reading the first value from list which has start key + IndexKey searchStartKey = listOfStartEndKeys.get(0); + // reading the last value from list which has end key + IndexKey searchEndKey = listOfStartEndKeys.get(1); + if (null == searchStartKey && null == searchEndKey) { + try { + // TODO need to handle for no dictionary dimensions + searchStartKey = FilterUtil.prepareDefaultStartIndexKey(segmentProperties); + // TODO need to handle for no dictionary dimensions + searchEndKey = FilterUtil.prepareDefaultEndIndexKey(segmentProperties); + } catch (KeyGenException e) { + return null; + } + } + if (LOGGER.isDebugEnabled()) { + LOGGER.debug( + "Successfully retrieved the start and end key" + "Dictionary Start Key: " + searchStartKey + .getDictionaryKeys() + "No Dictionary Start Key " + searchStartKey + .getNoDictionaryKeys() + "Dictionary End Key: " + searchEndKey.getDictionaryKeys() + + "No Dictionary End Key " + searchEndKey.getNoDictionaryKeys()); + } + int startIndex = findStartIndex(convertToRow(searchStartKey), comparator); + int endIndex = findEndIndex(convertToRow(searchEndKey), comparator); + FilterExecuter filterExecuter = + FilterUtil.getFilterExecuterTree(filterExp, segmentProperties, null); + while (startIndex <= endIndex) { + DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(startIndex); + BitSet bitSet = filterExecuter.isScanRequired(getMinMaxValue(unsafeRow, MAX_VALUES_INDEX), + getMinMaxValue(unsafeRow, MIN_VALUES_INDEX)); + if (!bitSet.isEmpty()) { + blocklets.add(createBlocklet(unsafeRow, startIndex)); + } + startIndex++; + } + + return null; + } + + private byte[][] getMinMaxValue(DataMapRow row, int index) { + DataMapRow minMaxRow = row.getRow(index); + byte[][] minMax = new byte[minMaxRow.getColumnCount()][]; + for (int i = 0; i < minMax.length; i++) { + minMax[i] = row.getByteArray(i); + } + return minMax; + } + + private Blocklet createBlocklet(DataMapRow row, int blockletId) { + Blocklet blocklet = + new Blocklet(new String(row.getByteArray(FILE_PATH_INDEX)), blockletId + ""); + Blocklet.BlockletDetailInfo detailInfo = new Blocklet.BlockletDetailInfo(); + detailInfo.setRowCount(row.getInt(ROW_COUNT_INDEX)); + detailInfo.setPagesCount(row.getShort(PAGE_COUNT_INDEX)); + detailInfo.setVersionNumber(row.getShort(VERSION_INDEX)); + BlockletInfo blockletInfo = new BlockletInfo(); + try { + blockletInfo.writeSerializedData(row.getByteArray(BLOCK_INFO_INDEX)); + } catch (IOException e) { + throw new RuntimeException(e); + } + detailInfo.setBlockletInfo(blockletInfo); + blocklet.setDetailInfo(detailInfo); + return blocklet; + } + + /** + * Binary search used to get the first tentative index row based on + * search key + * + * @param key search key + * @return first tentative block + */ + private int findStartIndex(DataMapRow key, Comparator comparator) { + int childNodeIndex; + int low = 0; + int high = unsafeMemoryDMStore.getRowCount() - 1; + int mid = 0; + int compareRes = -1; + // + while (low <= high) { + mid = (low + high) >>> 1; + // compare the entries + compareRes = comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(mid)); + if (compareRes < 0) { + high = mid - 1; + } else if (compareRes > 0) { + low = mid + 1; + } else { + // if key is matched then get the first entry + int currentPos = mid; + while (currentPos - 1 >= 0 + && comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(currentPos - 1)) == 0) { + currentPos--; + } + mid = currentPos; + break; + } + } + // if compare result is less than zero then we + // and mid is more than 0 then we need to previous block as duplicates + // record can be present + if (compareRes < 0) { + if (mid > 0) { + mid--; + } + childNodeIndex = mid; + } else { + childNodeIndex = mid; + } + // get the leaf child + return childNodeIndex; + } + + /** + * Binary search used to get the last tentative block based on + * search key + * + * @param key search key + * @return first tentative block + */ + private int findEndIndex(DataMapRow key, Comparator comparator) { + int childNodeIndex; + int low = 0; + int high = unsafeMemoryDMStore.getRowCount() - 1; + int mid = 0; + int compareRes = -1; + // + while (low <= high) { + mid = (low + high) >>> 1; + // compare the entries + compareRes = comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(mid)); + if (compareRes < 0) { + high = mid - 1; + } else if (compareRes > 0) { + low = mid + 1; + } else { + int currentPos = mid; + // if key is matched then get the first entry + while (currentPos + 1 < unsafeMemoryDMStore.getRowCount() + && comparator.compare(key, unsafeMemoryDMStore.getUnsafeRow(currentPos + 1)) == 0) { + currentPos++; + } + mid = currentPos; + break; + } + } + // if compare result is less than zero then we + // and mid is more than 0 then we need to previous block as duplicates + // record can be present + if (compareRes < 0) { + if (mid > 0) { + mid--; + } + childNodeIndex = mid; + } else { + childNodeIndex = mid; + } + return childNodeIndex; + } + + private DataMapRow convertToRow(IndexKey key) { + ByteBuffer buffer = + ByteBuffer.allocate(key.getDictionaryKeys().length + key.getNoDictionaryKeys().length + 8); + buffer.putInt(key.getDictionaryKeys().length); + buffer.putInt(key.getNoDictionaryKeys().length); + buffer.put(key.getDictionaryKeys()); + buffer.put(key.getNoDictionaryKeys()); + DataMapRowImpl dataMapRow = new DataMapRowImpl(unsafeMemoryDMStore.getSchema()); + dataMapRow.setByteArray(buffer.array(), 0); + return dataMapRow; + } + + @Override public void clear() { + unsafeMemoryDMStore.freeMemory(); + unsafeMemoryDMStore = null; + segmentProperties = null; + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java new file mode 100644 index 00000000000..823973cfc4a --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java @@ -0,0 +1,85 @@ +package org.apache.carbondata.core.indexstore.blockletindex; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.events.ChangeEvent; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.DataMap; +import org.apache.carbondata.core.indexstore.DataMapDistributable; +import org.apache.carbondata.core.indexstore.DataMapWriter; +import org.apache.carbondata.core.indexstore.AbstractTableDataMap; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; + +/** + * Created by root1 on 16/6/17. + */ +public class BlockletTableMap extends AbstractTableDataMap { + + private String dataMapName; + + private AbsoluteTableIdentifier identifier; + + private Map> map = new HashMap<>(); + + @Override public void init(AbsoluteTableIdentifier identifier, String dataMapName) { + this.identifier = identifier; + this.dataMapName = dataMapName; + } + + @Override public DataMapWriter getMetaDataWriter() { + return null; + } + + @Override + public DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String segmentId) { + return null; + } + + @Override protected List getDataMaps(String segmentId) { + List dataMaps = map.get(segmentId); + if (dataMaps == null) { + dataMaps = new ArrayList<>(); + String path = identifier.getTablePath() + "/Part0/Segment_" + segmentId; + FileFactory.FileType fileType = FileFactory.getFileType(path); + CarbonFile carbonFile = FileFactory.getCarbonFile(path, fileType); + CarbonFile[] listFiles = carbonFile.listFiles(new CarbonFileFilter() { + @Override public boolean accept(CarbonFile file) { + return file.getName().endsWith(".carbonindex"); + } + }); + for (int i = 0; i < listFiles.length; i++) { + BlockletDataMap dataMap = new BlockletDataMap(); + dataMap.init(listFiles[i].getAbsolutePath()); + dataMaps.add(dataMap); + } + } + return dataMaps; + } + + @Override public List toDistributable(List segmentIds) { + return null; + } + + @Override protected DataMap getDataMap(DataMapDistributable distributable) { + return null; + } + + @Override public boolean isFiltersSupported(FilterResolverIntf filterExp) { + return false; + } + + @Override public void clear() { + + } + + @Override public void fireEvent(ChangeEvent event) { + + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java new file mode 100644 index 00000000000..853e4e8221c --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore.row; + +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; + +/** + * Index row + */ +public abstract class DataMapRow { + + protected DataMapSchema[] schemas; + + public DataMapRow(DataMapSchema[] schemas) { + this.schemas = schemas; + } + + public abstract byte[] getByteArray(int ordinal); + + public abstract DataMapRow getRow(int ordinal); + + public abstract void setByteArray(byte[] byteArray, int ordinal); + + public abstract int getInt(int ordinal); + + public abstract void setInt(int value, int ordinal); + + public abstract void setByte(byte value, int ordinal); + + public abstract byte getByte(int ordinal); + + public abstract void setShort(short value, int ordinal); + + public abstract short getShort(int ordinal); + + public abstract void setLong(long value, int ordinal); + + public abstract long getLong(int ordinal); + + public abstract void setFloat(float value, int ordinal); + + public abstract float getFloat(int ordinal); + + public abstract void setDouble(double value, int ordinal); + + public abstract double getDouble(int ordinal); + + public int getTotalSizeInBytes() { + int len = 0; + for (int i = 0; i < schemas.length; i++) { + len += getSizeInBytes(i); + } + return len; + } + + public int getSizeInBytes(int ordinal) { + switch (schemas[ordinal].getSchemaType()) { + case FIXED: + return schemas[ordinal].getLength(); + case VARIABLE: + return getByteArray(ordinal).length + 2; + case STRUCT: + return getRow(ordinal).getTotalSizeInBytes(); + default: + throw new UnsupportedOperationException("wrong type"); + } + } + + public int getColumnCount() { + return schemas.length; + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java new file mode 100644 index 00000000000..033ea34c89c --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java @@ -0,0 +1,85 @@ +package org.apache.carbondata.core.indexstore.row; + +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.metadata.datatype.DataType; + +/** + * Created by root1 on 17/6/17. + */ +public class DataMapRowImpl extends DataMapRow { + + private Object[] data; + + public DataMapRowImpl(DataMapSchema[] schemas) { + super(schemas); + this.data = new Object[schemas.length]; + } + + @Override public byte[] getByteArray(int ordinal) { + return (byte[]) data[ordinal]; + } + + @Override public DataMapRow getRow(int ordinal) { + return (DataMapRow) data[ordinal]; + } + + @Override public void setByteArray(byte[] byteArray, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.BYTE_ARRAY); + data[ordinal] = byteArray; + } + + @Override public int getInt(int ordinal) { + return (Integer) data[ordinal]; + } + + @Override public void setInt(int value, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.INT); + data[ordinal] = value; + } + + @Override public void setByte(byte value, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.BYTE); + data[ordinal] = value; + } + + @Override public byte getByte(int ordinal) { + return (Byte) data[ordinal]; + } + + @Override public void setShort(short value, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.SHORT); + data[ordinal] = value; + } + + @Override public short getShort(int ordinal) { + return (Short) data[ordinal]; + } + + @Override public void setLong(long value, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.LONG); + data[ordinal] = value; + } + + @Override public long getLong(int ordinal) { + return (Long) data[ordinal]; + } + + @Override public void setFloat(float value, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.FLOAT); + data[ordinal] = value; + } + + @Override public float getFloat(int ordinal) { + return (Float) data[ordinal]; + } + + @Override public void setDouble(double value, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.DOUBLE); + data[ordinal] = value; + } + + @Override public double getDouble(int ordinal) { + return (Double) data[ordinal]; + } + +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java new file mode 100644 index 00000000000..e0503727412 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java @@ -0,0 +1,111 @@ +package org.apache.carbondata.core.indexstore.row; + +import org.apache.carbondata.core.indexstore.schema.DataMapSchema; +import org.apache.carbondata.core.memory.MemoryBlock; + +import static org.apache.carbondata.core.memory.CarbonUnsafe.BYTE_ARRAY_OFFSET; +import static org.apache.carbondata.core.memory.CarbonUnsafe.unsafe; + +/** + * Created by root1 on 19/6/17. + */ +public class UnsafeDataMapRow extends DataMapRow { + + private MemoryBlock block; + + private int pointer; + + public UnsafeDataMapRow(DataMapSchema[] schemas, MemoryBlock block, int pointer) { + super(schemas); + this.block = block; + this.pointer = pointer; + } + + @Override public byte[] getByteArray(int ordinal) { + int length; + int position = getPosition(ordinal); + switch (schemas[ordinal].getSchemaType()) { + case VARIABLE: + length = unsafe.getShort(block.getBaseObject(), block.getBaseOffset() + pointer + position); + position += 2; + default: + length = schemas[ordinal].getLength(); + } + byte[] data = new byte[length]; + unsafe.copyMemory(block.getBaseObject(), block.getBaseOffset() + pointer + position, data, + BYTE_ARRAY_OFFSET, data.length); + return data; + } + + @Override public DataMapRow getRow(int ordinal) { + DataMapSchema[] childSchemas = + ((DataMapSchema.StructDataMapSchema) schemas[ordinal]).getChildSchemas(); + return new UnsafeDataMapRow(childSchemas, block, pointer + getPosition(ordinal)); + } + + @Override public void setByteArray(byte[] byteArray, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + + @Override public int getInt(int ordinal) { + return unsafe + .getInt(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal)); + } + + @Override public void setInt(int value, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + + @Override public void setByte(byte value, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + + @Override public byte getByte(int ordinal) { + return unsafe + .getByte(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal)); + } + + @Override public void setShort(short value, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + + @Override public short getShort(int ordinal) { + return unsafe + .getShort(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal)); + } + + @Override public void setLong(long value, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + + @Override public long getLong(int ordinal) { + return unsafe + .getLong(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal)); + } + + @Override public void setFloat(float value, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + + @Override public float getFloat(int ordinal) { + return unsafe + .getFloat(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal)); + } + + @Override public void setDouble(double value, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + + @Override public double getDouble(int ordinal) { + return unsafe + .getDouble(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal)); + } + + private int getPosition(int ordinal) { + int position = 0; + for (int i = 0; i < ordinal; i++) { + position += getSizeInBytes(ordinal); + } + return position; + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java new file mode 100644 index 00000000000..80c68ac18bf --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/schema/DataMapSchema.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore.schema; + +import org.apache.carbondata.core.metadata.datatype.DataType; + +/** + * It just have 2 types right now, either fixed or variable. + */ +public abstract class DataMapSchema { + + protected DataType dataType; + + public DataMapSchema(DataType dataType) { + this.dataType = dataType; + } + + /** + * Either fixed or variable length. + * + * @return + */ + public DataType getDataType() { + return dataType; + } + + /** + * Gives length in case of fixed schema other wise returns length + * + * @return + */ + public abstract int getLength(); + + /** + * schema type + * @return + */ + public abstract DataMapSchemaType getSchemaType(); + + /* + * It has always fixed length, length cannot be updated later. + * Usage examples : all primitive types like short, int etc + */ + public static class FixedDataMapSchema extends DataMapSchema { + + private int length; + + public FixedDataMapSchema(DataType dataType) { + super(dataType); + } + + public FixedDataMapSchema(DataType dataType, int length) { + super(dataType); + this.length = length; + } + + @Override public int getLength() { + if (length == 0) { + return dataType.getSizeInBytes(); + } else { + return length; + } + } + + @Override public DataMapSchemaType getSchemaType() { + return DataMapSchemaType.FIXED; + } + } + + public static class VariableDataMapSchema extends DataMapSchema { + + public VariableDataMapSchema(DataType dataType) { + super(dataType); + } + + @Override public int getLength() { + return dataType.getSizeInBytes(); + } + + @Override public DataMapSchemaType getSchemaType() { + return DataMapSchemaType.VARIABLE; + } + } + + public static class StructDataMapSchema extends DataMapSchema { + + private DataMapSchema[] childSchemas; + + public StructDataMapSchema(DataType dataType, DataMapSchema[] childSchemas) { + super(dataType); + this.childSchemas = childSchemas; + } + + @Override public int getLength() { + return dataType.getSizeInBytes(); + } + + public DataMapSchema[] getChildSchemas() { + return childSchemas; + } + + @Override public DataMapSchemaType getSchemaType() { + return DataMapSchemaType.STRUCT; + } + } + + public enum DataMapSchemaType { + FIXED, VARIABLE, STRUCT + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java index bfa9d7ea490..c1f97eb81d3 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java @@ -17,7 +17,13 @@ package org.apache.carbondata.core.metadata.blocklet; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; import java.io.Serializable; +import java.util.ArrayList; import java.util.List; import org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk; @@ -189,4 +195,59 @@ public void setNumberOfPages(int numberOfPages) { this.numberOfPages = numberOfPages; } + public byte[] getSerializedData() throws IOException { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + DataOutputStream output = new DataOutputStream(stream); + output.writeLong(dimensionOffset); + output.writeLong(measureOffsets); + int dsize = dimensionChunkOffsets != null ? dimensionChunkOffsets.size() : 0; + output.writeShort(dsize); + for (int i = 0; i < dsize; i++) { + output.writeLong(dimensionChunkOffsets.get(i)); + } + for (int i = 0; i < dsize; i++) { + output.writeInt(dimensionChunksLength.get(i)); + } + int mSize = measureChunkOffsets != null ? measureChunkOffsets.size() : 0; + output.writeShort(mSize); + for (int i = 0; i < mSize; i++) { + output.writeLong(measureChunkOffsets.get(i)); + } + for (int i = 0; i < mSize; i++) { + output.writeInt(measureChunksLength.get(i)); + } + + output.close(); + return stream.toByteArray(); + } + + public void writeSerializedData(byte[] data) throws IOException { + ByteArrayInputStream stream = new ByteArrayInputStream(data); + DataInputStream input = new DataInputStream(stream); + + dimensionOffset = input.readLong(); + measureOffsets = input.readLong(); + short dimensionChunkOffsetsSize = input.readShort(); + dimensionChunkOffsets = new ArrayList<>(dimensionChunkOffsetsSize); + for (int i = 0; i < dimensionChunkOffsetsSize; i++) { + dimensionChunkOffsets.add(input.readLong()); + } + dimensionChunksLength = new ArrayList<>(dimensionChunkOffsetsSize); + for (int i = 0; i < dimensionChunkOffsetsSize; i++) { + dimensionChunksLength.add(input.readInt()); + } + + short measureChunkOffsetsSize = input.readShort(); + measureChunkOffsets = new ArrayList<>(measureChunkOffsetsSize); + for (int i = 0; i < measureChunkOffsetsSize; i++) { + measureChunkOffsets.add(input.readLong()); + } + measureChunksLength = new ArrayList<>(measureChunkOffsetsSize); + for (int i = 0; i < measureChunkOffsetsSize; i++) { + measureChunksLength.add(input.readInt()); + } + + input.close(); + } + } diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java index cd86a075272..ae99ed829ee 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/index/BlockIndexInfo.java @@ -16,6 +16,7 @@ */ package org.apache.carbondata.core.metadata.index; +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex; /** @@ -44,6 +45,11 @@ public class BlockIndexInfo { */ private BlockletIndex blockletIndex; + /** + * to store blocklet info like offsets and lengths of each column. + */ + private BlockletInfo blockletInfo; + /** * Constructor * @@ -60,6 +66,20 @@ public BlockIndexInfo(long numberOfRows, String fileName, long offset, this.blockletIndex = blockletIndex; } + /** + * + * @param numberOfRows + * @param fileName + * @param offset + * @param blockletIndex + * @param blockletInfo + */ + public BlockIndexInfo(long numberOfRows, String fileName, long offset, + BlockletIndex blockletIndex, BlockletInfo blockletInfo) { + this(numberOfRows, fileName, offset, blockletIndex); + this.blockletInfo = blockletInfo; + } + /** * @return the numberOfRows */ @@ -87,4 +107,11 @@ public long getOffset() { public BlockletIndex getBlockletIndex() { return blockletIndex; } + + /** + * @return BlockletInfo + */ + public BlockletInfo getBlockletInfo() { + return blockletInfo; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java index 97b1a1f36c0..2064bad14b6 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java +++ b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java @@ -121,6 +121,57 @@ public List getIndexInfo(String filePath, List t return dataFileFooters; } + /** + * Below method will be used to get the index info from index file + * + * @param filePath file path of the index file + * @return list of index info + * @throws IOException problem while reading the index file + */ + public List getIndexInfo(String filePath) throws IOException { + CarbonIndexFileReader indexReader = new CarbonIndexFileReader(); + List dataFileFooters = new ArrayList(); + String parentPath = filePath.substring(0, filePath.lastIndexOf("/")); + try { + // open the reader + indexReader.openThriftReader(filePath); + // get the index header + org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader(); + List columnSchemaList = new ArrayList(); + List table_columns = + readIndexHeader.getTable_columns(); + for (int i = 0; i < table_columns.size(); i++) { + columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i))); + } + // get the segment info + SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info()); + BlockletIndex blockletIndex = null; + DataFileFooter dataFileFooter = null; + // read the block info from file + while (indexReader.hasNext()) { + BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo(); + blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index()); + dataFileFooter = new DataFileFooter(); + TableBlockInfo tableBlockInfo = new TableBlockInfo(); + tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset()); + tableBlockInfo.setVersion( + ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion())); + int blockletSize = getBlockletSize(readBlockIndexInfo); + tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize); + tableBlockInfo.setFilePath(parentPath + "/" + readBlockIndexInfo.file_name); + dataFileFooter.setBlockletIndex(blockletIndex); + dataFileFooter.setColumnInTable(columnSchemaList); + dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows()); + dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo)); + dataFileFooter.setSegmentInfo(segmentInfo); + dataFileFooters.add(dataFileFooter); + } + } finally { + indexReader.closeThriftReader(); + } + return dataFileFooters; + } + /** * the methods returns the number of blocklets in a block * diff --git a/format/src/main/thrift/carbondata_index.thrift b/format/src/main/thrift/carbondata_index.thrift index c055031d041..4df085ad26b 100644 --- a/format/src/main/thrift/carbondata_index.thrift +++ b/format/src/main/thrift/carbondata_index.thrift @@ -41,4 +41,5 @@ struct BlockIndex{ 2: required string file_name; // Block file name 3: required i64 offset; // Offset of the footer 4: required carbondata.BlockletIndex block_index; // Blocklet index + 5: optional carbondata.BlockletInfo3 blocklet_info; } \ No newline at end of file From 0216583315232ddb80ada23451793f7e025e33a0 Mon Sep 17 00:00:00 2001 From: ravipesala Date: Thu, 22 Jun 2017 10:19:52 +0530 Subject: [PATCH 2/4] Blocklet implementation for datamap --- .../core/datastore/block/TableBlockInfo.java | 11 + .../core/indexstore/AbstractTableDataMap.java | 12 +- .../carbondata/core/indexstore/Blocklet.java | 75 ++- .../core/indexstore/BlockletDetailInfo.java | 105 ++++ .../core/indexstore/DataMapStoreManager.java | 19 +- .../core/indexstore/DataMapType.java | 2 +- .../blockletindex/BlockletDataMap.java | 104 +++- .../BlockletDataRefNodeWrapper.java | 137 +++++ .../blockletindex/BlockletTableMap.java | 24 +- .../blockletindex/IndexWrapper.java | 58 ++ .../core/indexstore/row/DataMapRow.java | 2 + .../core/indexstore/row/DataMapRowImpl.java | 5 + .../core/indexstore/row/UnsafeDataMapRow.java | 7 +- .../core/metadata/blocklet/BlockletInfo.java | 20 +- .../executor/impl/AbstractQueryExecutor.java | 36 +- .../processor/AbstractDataBlockIterator.java | 3 + .../AbstractDetailQueryResultIterator.java | 33 +- .../util/AbstractDataFileFooterConverter.java | 2 + .../carbondata/core/util/CarbonUtil.java | 13 +- .../core/util/DataFileFooterConverter.java | 4 + .../core/util/DataFileFooterConverter2.java | 3 + .../core/util/DataFileFooterConverterV3.java | 11 + .../carbondata/hadoop/CarbonInputFormat.java | 14 +- .../hadoop/CarbonInputFormatNew.java | 538 ++++++++++++++++++ .../carbondata/hadoop/CarbonInputSplit.java | 30 +- .../spark/rdd/CarbonMergerRDD.scala | 3 +- .../carbondata/spark/rdd/CarbonScanRDD.scala | 18 +- 27 files changed, 1139 insertions(+), 150 deletions(-) create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java create mode 100644 hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java index 6aba13c8c34..f0038820035 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TableBlockInfo.java @@ -22,6 +22,7 @@ import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.indexstore.BlockletDetailInfo; import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.path.CarbonTablePath; @@ -77,6 +78,8 @@ public class TableBlockInfo implements Distributable, Serializable { */ private String[] deletedDeltaFilePath; + private BlockletDetailInfo detailInfo; + public TableBlockInfo(String filePath, long blockOffset, String segmentId, String[] locations, long blockLength, ColumnarFormatVersion version, String[] deletedDeltaFilePath) { this.filePath = FileFactory.getUpdatedFilePath(filePath); @@ -327,4 +330,12 @@ public String[] getDeletedDeltaFilePath() { public void setFilePath(String filePath) { this.filePath = filePath; } + + public BlockletDetailInfo getDetailInfo() { + return detailInfo; + } + + public void setDetailInfo(BlockletDetailInfo detailInfo) { + this.detailInfo = detailInfo; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java index 1f97977f900..87e06574e75 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java @@ -62,12 +62,20 @@ public List prune(List segmentIds, FilterResolverIntf filterEx for (String segmentId: segmentIds) { List dataMaps = getDataMaps(segmentId); for (DataMap dataMap: dataMaps) { - blocklets.addAll(dataMap.prune(filterExp)); + List pruneBlocklets = dataMap.prune(filterExp); + blocklets.addAll(addSegmentId(pruneBlocklets, segmentId)); } } return blocklets; } + private List addSegmentId(List pruneBlocklets, String segmentId) { + for (Blocklet blocklet : pruneBlocklets) { + blocklet.setSegmentId(segmentId); + } + return pruneBlocklets; + } + /** * Get the datamap for segmentid * @param segmentId @@ -115,6 +123,6 @@ public List prune(DataMapDistributable distributable, FilterResolverIn /** * Clears table level datamap */ - public abstract void clear(); + public abstract void clear(List segmentIds); } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java index 0de21a3abb2..4f27b12f7d8 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java @@ -16,27 +16,40 @@ */ package org.apache.carbondata.core.indexstore; +import java.io.IOException; import java.io.Serializable; +import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; + /** * Blocklet */ public class Blocklet implements Serializable { - private String path; + private Path path; + + private String segmentId; private String blockletId; private BlockletDetailInfo detailInfo; + private long length; + + private String[] location;; + public Blocklet(String path, String blockletId) { - this.path = path; + this.path = new Path(path); this.blockletId = blockletId; } - public String getPath() { + public Path getPath() { return path; } @@ -52,46 +65,28 @@ public void setDetailInfo(BlockletDetailInfo detailInfo) { this.detailInfo = detailInfo; } - public static class BlockletDetailInfo implements Serializable { - - private int rowCount; - - private int pagesCount; - - private int versionNumber; - - private BlockletInfo blockletInfo; - - public int getRowCount() { - return rowCount; - } - - public void setRowCount(int rowCount) { - this.rowCount = rowCount; - } - - public int getPagesCount() { - return pagesCount; - } - - public void setPagesCount(int pagesCount) { - this.pagesCount = pagesCount; - } + public void updateLocations() throws IOException { + FileSystem fs = path.getFileSystem(FileFactory.getConfiguration()); + RemoteIterator iter = fs.listLocatedStatus(path); + LocatedFileStatus fileStatus = iter.next(); + location = fileStatus.getBlockLocations()[0].getHosts(); + length = fileStatus.getLen(); + } - public int getVersionNumber() { - return versionNumber; - } + public String[] getLocations() throws IOException { + return location; + } - public void setVersionNumber(int versionNumber) { - this.versionNumber = versionNumber; - } + public long getLength() throws IOException { + return length; + } - public BlockletInfo getBlockletInfo() { - return blockletInfo; - } + public String getSegmentId() { + return segmentId; + } - public void setBlockletInfo(BlockletInfo blockletInfo) { - this.blockletInfo = blockletInfo; - } + public void setSegmentId(String segmentId) { + this.segmentId = segmentId; } + } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java new file mode 100644 index 00000000000..f1c7f68952e --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; + +import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; + +import org.apache.hadoop.io.Writable; + +/** + * Blocklet detail information to be sent to each executor + */ +public class BlockletDetailInfo implements Serializable, Writable { + + private int rowCount; + + private short pagesCount; + + private short versionNumber; + + private int[] dimLens; + + private BlockletInfo blockletInfo; + + public int getRowCount() { + return rowCount; + } + + public void setRowCount(int rowCount) { + this.rowCount = rowCount; + } + + public int getPagesCount() { + return pagesCount; + } + + public void setPagesCount(short pagesCount) { + this.pagesCount = pagesCount; + } + + public short getVersionNumber() { + return versionNumber; + } + + public void setVersionNumber(short versionNumber) { + this.versionNumber = versionNumber; + } + + public BlockletInfo getBlockletInfo() { + return blockletInfo; + } + + public void setBlockletInfo(BlockletInfo blockletInfo) { + this.blockletInfo = blockletInfo; + } + + public int[] getDimLens() { + return dimLens; + } + + public void setDimLens(int[] dimLens) { + this.dimLens = dimLens; + } + + @Override public void write(DataOutput out) throws IOException { + out.writeInt(rowCount); + out.writeShort(pagesCount); + out.writeShort(versionNumber); + out.writeShort(dimLens.length); + for (int i = 0; i < dimLens.length; i++) { + out.writeInt(dimLens[i]); + } + blockletInfo.write(out); + } + + @Override public void readFields(DataInput in) throws IOException { + rowCount = in.readInt(); + pagesCount = in.readShort(); + versionNumber = in.readShort(); + dimLens = new int[in.readShort()]; + for (int i = 0; i < dimLens.length; i++) { + dimLens[i] = in.readInt(); + } + blockletInfo = new BlockletInfo(); + blockletInfo.readFields(in); + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java index de31581a2c9..20058d3c95a 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapStoreManager.java @@ -16,6 +16,7 @@ */ package org.apache.carbondata.core.indexstore; +import java.util.ArrayList; import java.util.HashMap; import java.util.Map; @@ -51,12 +52,12 @@ public AbstractTableDataMap getDataMap(AbsoluteTableIdentifier identifier, Strin Map map = dataMapMappping.get(mapType); AbstractTableDataMap dataMap = null; if (map == null) { + createTableDataMap(identifier, mapType, dataMapName); + map = dataMapMappping.get(mapType); + } + dataMap = map.get(dataMapName); + if (dataMap == null) { throw new RuntimeException("Datamap does not exist"); - } else { - dataMap = map.get(dataMapName); - if (dataMap == null) { - throw new RuntimeException("Datamap does not exist"); - } } // Initialize datamap dataMap.init(identifier, dataMapName); @@ -69,8 +70,8 @@ public AbstractTableDataMap getDataMap(AbsoluteTableIdentifier identifier, Strin * @param mapType * @return */ - public AbstractTableDataMap createTableDataMap(AbsoluteTableIdentifier identifier, DataMapType mapType, - String dataMapName) { + public AbstractTableDataMap createTableDataMap(AbsoluteTableIdentifier identifier, + DataMapType mapType, String dataMapName) { Map map = dataMapMappping.get(mapType); if (map == null) { map = new HashMap<>(); @@ -82,7 +83,7 @@ public AbstractTableDataMap createTableDataMap(AbsoluteTableIdentifier identifie } try { - //TODO create datamap using @mapType.getClassName()) + dataMap = (AbstractTableDataMap) (Class.forName(mapType.getClassName()).newInstance()); } catch (Exception e) { LOGGER.error(e); } @@ -94,7 +95,7 @@ public AbstractTableDataMap createTableDataMap(AbsoluteTableIdentifier identifie public void clearDataMap(String dataMapName, DataMapType mapType) { Map map = dataMapMappping.get(mapType); if (map != null && map.get(dataMapName) != null) { - map.remove(dataMapName).clear(); + map.remove(dataMapName).clear(new ArrayList()); } } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java index b6a0f5b0ff5..a9477f0770b 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/DataMapType.java @@ -20,7 +20,7 @@ * Datamap type */ public enum DataMapType { - BLOCKLET("org.apache.carbondata.datamap.BlockletDataMap"); + BLOCKLET("org.apache.carbondata.core.indexstore.blockletindex.BlockletTableMap"); private String className; diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java index 5894bac2e60..46c60c63284 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java @@ -1,5 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.carbondata.core.indexstore.blockletindex; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -11,7 +32,9 @@ import org.apache.carbondata.common.logging.LogServiceFactory; import org.apache.carbondata.core.datastore.IndexKey; import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.BlockletDetailInfo; import org.apache.carbondata.core.indexstore.DataMap; import org.apache.carbondata.core.indexstore.DataMapWriter; import org.apache.carbondata.core.indexstore.UnsafeMemoryDMStore; @@ -58,6 +81,8 @@ public class BlockletDataMap implements DataMap { private SegmentProperties segmentProperties; + private int[] columnCardinality; + @Override public DataMapWriter getWriter() { return null; } @@ -69,20 +94,22 @@ public class BlockletDataMap implements DataMap { for (DataFileFooter fileFooter : indexInfo) { List columnInTable = fileFooter.getColumnInTable(); if (segmentProperties == null) { - segmentProperties = new SegmentProperties(columnInTable, - fileFooter.getSegmentInfo().getColumnCardinality()); + columnCardinality = fileFooter.getSegmentInfo().getColumnCardinality(); + segmentProperties = new SegmentProperties(columnInTable, columnCardinality); createSchema(segmentProperties); } - fileFooter = CarbonUtil.readMetadatFile(fileFooter.getBlockInfo().getTableBlockInfo()); - loadToUnsafe(fileFooter, segmentProperties); - } + TableBlockInfo blockInfo = fileFooter.getBlockInfo().getTableBlockInfo(); + fileFooter = CarbonUtil.readMetadatFile(blockInfo); + loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath()); + } } catch (IOException e) { throw new RuntimeException(e); } } - private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties) { + private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentProperties, + String filePath) { int[] minMaxLen = segmentProperties.getEachDimColumnValueSize(); List blockletList = fileFooter.getBlockletList(); DataMapSchema[] schema = unsafeMemoryDMStore.getSchema(); @@ -95,14 +122,16 @@ private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentPr row.setByteArray(blockletInfo.getBlockletIndex().getBtreeIndex().getStartKey(), ordinal++); BlockletMinMaxIndex minMaxIndex = blockletInfo.getBlockletIndex().getMinMaxIndex(); - addMinMax(minMaxLen, schema[ordinal++], minMaxIndex.getMinValues()); - addMinMax(minMaxLen, schema[ordinal++], minMaxIndex.getMaxValues()); + row.setRow(addMinMax(minMaxLen, schema[ordinal], minMaxIndex.getMinValues()), ordinal); + ordinal++; + row.setRow(addMinMax(minMaxLen, schema[ordinal], minMaxIndex.getMaxValues()), ordinal); + ordinal++; row.setInt(blockletInfo.getNumberOfRows(), ordinal++); // add file path - byte[] filePath = fileFooter.getBlockInfo().getTableBlockInfo().getFilePath().getBytes(); - row.setByteArray(filePath, ordinal++); + byte[] filePathBytes = filePath.getBytes(); + row.setByteArray(filePathBytes, ordinal++); // add pages row.setShort((short) blockletInfo.getNumberOfPages(), ordinal++); @@ -113,7 +142,10 @@ private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentPr // add blocklet info byte[] serializedData; try { - serializedData = blockletInfo.getSerializedData(); + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + DataOutput dataOutput = new DataOutputStream(stream); + blockletInfo.write(dataOutput); + serializedData = stream.toByteArray(); } catch (IOException e) { throw new RuntimeException(e); } @@ -123,7 +155,7 @@ private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentPr unsafeMemoryDMStore.finishWriting(); } - private void addMinMax(int[] minMaxLen, DataMapSchema dataMapSchema, byte[][] minValues) { + private DataMapRow addMinMax(int[] minMaxLen, DataMapSchema dataMapSchema, byte[][] minValues) { DataMapSchema[] minSchemas = ((DataMapSchema.StructDataMapSchema) dataMapSchema).getChildSchemas(); DataMapRow minRow = new DataMapRowImpl(minSchemas); @@ -132,6 +164,7 @@ private void addMinMax(int[] minMaxLen, DataMapSchema dataMapSchema, byte[][] mi for (int i = 0; i < minMaxLen.length; i++) { minRow.setByteArray(minValues[i], minOrdinal++); } + return minRow; } private void createSchema(SegmentProperties segmentProperties) { @@ -145,9 +178,9 @@ private void createSchema(SegmentProperties segmentProperties) { DataMapSchema[] mapSchemas = new DataMapSchema[minMaxLen.length]; for (int i = 0; i < minMaxLen.length; i++) { if (minMaxLen[i] <= 0) { - indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY)); + mapSchemas[i] = new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY); } else { - indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.BYTE_ARRAY, minMaxLen[i])); + mapSchemas[i] = new DataMapSchema.FixedDataMapSchema(DataType.BYTE_ARRAY, minMaxLen[i]); } } DataMapSchema mapSchema = new DataMapSchema.StructDataMapSchema(DataType.STRUCT, mapSchemas); @@ -210,28 +243,36 @@ private void createSchema(SegmentProperties segmentProperties) { .getNoDictionaryKeys() + "Dictionary End Key: " + searchEndKey.getDictionaryKeys() + "No Dictionary End Key " + searchEndKey.getNoDictionaryKeys()); } - int startIndex = findStartIndex(convertToRow(searchStartKey), comparator); - int endIndex = findEndIndex(convertToRow(searchEndKey), comparator); - FilterExecuter filterExecuter = - FilterUtil.getFilterExecuterTree(filterExp, segmentProperties, null); - while (startIndex <= endIndex) { - DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(startIndex); - BitSet bitSet = filterExecuter.isScanRequired(getMinMaxValue(unsafeRow, MAX_VALUES_INDEX), - getMinMaxValue(unsafeRow, MIN_VALUES_INDEX)); - if (!bitSet.isEmpty()) { - blocklets.add(createBlocklet(unsafeRow, startIndex)); + if (filterExp == null) { + int rowCount = unsafeMemoryDMStore.getRowCount(); + for (int i = 0; i < rowCount; i++) { + DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(i); + blocklets.add(createBlocklet(unsafeRow, i)); + } + } else { + int startIndex = findStartIndex(convertToRow(searchStartKey), comparator); + int endIndex = findEndIndex(convertToRow(searchEndKey), comparator); + FilterExecuter filterExecuter = + FilterUtil.getFilterExecuterTree(filterExp, segmentProperties, null); + while (startIndex <= endIndex) { + DataMapRow unsafeRow = unsafeMemoryDMStore.getUnsafeRow(startIndex); + BitSet bitSet = filterExecuter.isScanRequired(getMinMaxValue(unsafeRow, MAX_VALUES_INDEX), + getMinMaxValue(unsafeRow, MIN_VALUES_INDEX)); + if (!bitSet.isEmpty()) { + blocklets.add(createBlocklet(unsafeRow, startIndex)); + } + startIndex++; } - startIndex++; } - return null; + return blocklets; } private byte[][] getMinMaxValue(DataMapRow row, int index) { DataMapRow minMaxRow = row.getRow(index); byte[][] minMax = new byte[minMaxRow.getColumnCount()][]; for (int i = 0; i < minMax.length; i++) { - minMax[i] = row.getByteArray(i); + minMax[i] = minMaxRow.getByteArray(i); } return minMax; } @@ -239,13 +280,18 @@ private byte[][] getMinMaxValue(DataMapRow row, int index) { private Blocklet createBlocklet(DataMapRow row, int blockletId) { Blocklet blocklet = new Blocklet(new String(row.getByteArray(FILE_PATH_INDEX)), blockletId + ""); - Blocklet.BlockletDetailInfo detailInfo = new Blocklet.BlockletDetailInfo(); + BlockletDetailInfo detailInfo = new BlockletDetailInfo(); detailInfo.setRowCount(row.getInt(ROW_COUNT_INDEX)); detailInfo.setPagesCount(row.getShort(PAGE_COUNT_INDEX)); detailInfo.setVersionNumber(row.getShort(VERSION_INDEX)); + detailInfo.setDimLens(columnCardinality); BlockletInfo blockletInfo = new BlockletInfo(); try { - blockletInfo.writeSerializedData(row.getByteArray(BLOCK_INFO_INDEX)); + byte[] byteArray = row.getByteArray(BLOCK_INFO_INDEX); + ByteArrayInputStream stream = new ByteArrayInputStream(byteArray); + DataInputStream inputStream = new DataInputStream(stream); + blockletInfo.readFields(inputStream); + inputStream.close(); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java new file mode 100644 index 00000000000..5509c75b1d3 --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataRefNodeWrapper.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore.blockletindex; + +import java.io.IOException; +import java.util.List; + +import org.apache.carbondata.core.cache.update.BlockletLevelDeleteDeltaDataCache; +import org.apache.carbondata.core.datastore.DataRefNode; +import org.apache.carbondata.core.datastore.FileHolder; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.reader.CarbonDataReaderFactory; +import org.apache.carbondata.core.datastore.chunk.reader.DimensionColumnChunkReader; +import org.apache.carbondata.core.datastore.chunk.reader.MeasureColumnChunkReader; +import org.apache.carbondata.core.metadata.ColumnarFormatVersion; + +/** + * wrapper for blocklet data map data + */ +public class BlockletDataRefNodeWrapper implements DataRefNode { + + private List blockInfos; + + private int index; + + private int[] dimensionLens; + + private BlockletLevelDeleteDeltaDataCache deleteDeltaDataCache; + + public BlockletDataRefNodeWrapper(List blockInfos, int index, + int[] dimensionLens) { + this.blockInfos = blockInfos; + this.index = index; + this.dimensionLens = dimensionLens; + } + + @Override public DataRefNode getNextDataRefNode() { + if (index + 1 < blockInfos.size()) { + new BlockletDataRefNodeWrapper(blockInfos, index + 1, dimensionLens); + } + return null; + } + + @Override public int nodeSize() { + return blockInfos.get(index).getDetailInfo().getRowCount(); + } + + @Override public long nodeNumber() { + return index; + } + + @Override public byte[][] getColumnsMaxValue() { + return null; + } + + @Override public byte[][] getColumnsMinValue() { + return null; + } + + @Override + public DimensionRawColumnChunk[] getDimensionChunks(FileHolder fileReader, int[][] blockIndexes) + throws IOException { + DimensionColumnChunkReader dimensionChunksReader = getDimensionColumnChunkReader(); + return dimensionChunksReader.readRawDimensionChunks(fileReader, blockIndexes); + } + + @Override + public DimensionRawColumnChunk getDimensionChunk(FileHolder fileReader, int blockIndexes) + throws IOException { + DimensionColumnChunkReader dimensionChunksReader = getDimensionColumnChunkReader(); + return dimensionChunksReader.readRawDimensionChunk(fileReader, blockIndexes); + } + + @Override + public MeasureRawColumnChunk[] getMeasureChunks(FileHolder fileReader, int[][] blockIndexes) + throws IOException { + MeasureColumnChunkReader measureColumnChunkReader = getMeasureColumnChunkReader(); + return measureColumnChunkReader.readRawMeasureChunks(fileReader, blockIndexes); + } + + @Override public MeasureRawColumnChunk getMeasureChunk(FileHolder fileReader, int blockIndex) + throws IOException { + MeasureColumnChunkReader measureColumnChunkReader = getMeasureColumnChunkReader(); + return measureColumnChunkReader.readRawMeasureChunk(fileReader, blockIndex); + } + + private DimensionColumnChunkReader getDimensionColumnChunkReader() throws IOException { + ColumnarFormatVersion version = + ColumnarFormatVersion.valueOf(blockInfos.get(index).getDetailInfo().getVersionNumber()); + DimensionColumnChunkReader dimensionColumnChunkReader = CarbonDataReaderFactory.getInstance() + .getDimensionColumnChunkReader(version, + blockInfos.get(index).getDetailInfo().getBlockletInfo(), dimensionLens, + blockInfos.get(index).getFilePath()); + return dimensionColumnChunkReader; + } + + private MeasureColumnChunkReader getMeasureColumnChunkReader() throws IOException { + ColumnarFormatVersion version = + ColumnarFormatVersion.valueOf(blockInfos.get(index).getDetailInfo().getVersionNumber()); + return CarbonDataReaderFactory.getInstance().getMeasureColumnChunkReader(version, + blockInfos.get(index).getDetailInfo().getBlockletInfo(), + blockInfos.get(index).getFilePath()); + } + + @Override + public void setDeleteDeltaDataCache(BlockletLevelDeleteDeltaDataCache deleteDeltaDataCache) { + this.deleteDeltaDataCache = deleteDeltaDataCache; + } + + @Override public BlockletLevelDeleteDeltaDataCache getDeleteDeltaDataCache() { + return deleteDeltaDataCache; + } + + @Override public int numberOfPages() { + return blockInfos.get(index).getDetailInfo().getPagesCount(); + } + + public int numberOfNodes() { + return blockInfos.size(); + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java index 823973cfc4a..18ede531967 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.carbondata.core.indexstore.blockletindex; import java.util.ArrayList; @@ -18,7 +34,7 @@ import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; /** - * Created by root1 on 16/6/17. + * Table map for blocklet */ public class BlockletTableMap extends AbstractTableDataMap { @@ -46,7 +62,7 @@ public DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String List dataMaps = map.get(segmentId); if (dataMaps == null) { dataMaps = new ArrayList<>(); - String path = identifier.getTablePath() + "/Part0/Segment_" + segmentId; + String path = identifier.getTablePath() + "/Fact/Part0/Segment_" + segmentId; FileFactory.FileType fileType = FileFactory.getFileType(path); CarbonFile carbonFile = FileFactory.getCarbonFile(path, fileType); CarbonFile[] listFiles = carbonFile.listFiles(new CarbonFileFilter() { @@ -72,10 +88,10 @@ public DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String } @Override public boolean isFiltersSupported(FilterResolverIntf filterExp) { - return false; + return true; } - @Override public void clear() { + @Override public void clear(List segmentIds) { } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java new file mode 100644 index 00000000000..c18d2a9424c --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore.blockletindex; + +import java.io.IOException; +import java.util.List; + +import org.apache.carbondata.core.datastore.block.AbstractIndex; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.indexstore.BlockletDetailInfo; +import org.apache.carbondata.core.metadata.ColumnarFormatVersion; +import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; +import org.apache.carbondata.core.util.AbstractDataFileFooterConverter; +import org.apache.carbondata.core.util.DataFileFooterConverterFactory; + +/** + * Wrapper of abstract index + * TODO it could be removed after refactor + */ +public class IndexWrapper extends AbstractIndex { + + public IndexWrapper(List blockInfos) { + BlockletDetailInfo detailInfo = blockInfos.get(0).getDetailInfo(); + int[] dimLens = detailInfo.getDimLens(); + ColumnarFormatVersion version = + ColumnarFormatVersion.valueOf(detailInfo.getVersionNumber()); + AbstractDataFileFooterConverter dataFileFooterConverter = + DataFileFooterConverterFactory.getInstance().getDataFileFooterConverter(version); + List schema; + try { + schema = dataFileFooterConverter.getSchema(blockInfos.get(0)); + } catch (IOException e) { + throw new RuntimeException(e); + } + segmentProperties = new SegmentProperties(schema, dimLens); + dataRefNode = new BlockletDataRefNodeWrapper(blockInfos, 0, + segmentProperties.getDimensionColumnsValueSize()); + } + + @Override public void buildIndex(List footerList) { + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java index 853e4e8221c..3ad20a1c480 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRow.java @@ -33,6 +33,8 @@ public DataMapRow(DataMapSchema[] schemas) { public abstract DataMapRow getRow(int ordinal); + public abstract void setRow(DataMapRow row, int ordinal); + public abstract void setByteArray(byte[] byteArray, int ordinal); public abstract int getInt(int ordinal); diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java index 033ea34c89c..aa66723c5b2 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java @@ -78,6 +78,11 @@ public DataMapRowImpl(DataMapSchema[] schemas) { data[ordinal] = value; } + @Override public void setRow(DataMapRow row, int ordinal) { + assert (schemas[ordinal].getDataType() == DataType.STRUCT); + data[ordinal] = row; + } + @Override public double getDouble(int ordinal) { return (Double) data[ordinal]; } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java index e0503727412..a01e9fcb6ce 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java @@ -28,6 +28,7 @@ public UnsafeDataMapRow(DataMapSchema[] schemas, MemoryBlock block, int pointer) case VARIABLE: length = unsafe.getShort(block.getBaseObject(), block.getBaseOffset() + pointer + position); position += 2; + break; default: length = schemas[ordinal].getLength(); } @@ -101,10 +102,14 @@ public UnsafeDataMapRow(DataMapSchema[] schemas, MemoryBlock block, int pointer) .getDouble(block.getBaseObject(), block.getBaseOffset() + pointer + getPosition(ordinal)); } + @Override public void setRow(DataMapRow row, int ordinal) { + throw new UnsupportedOperationException("Not supported to set on unsafe row"); + } + private int getPosition(int ordinal) { int position = 0; for (int i = 0; i < ordinal; i++) { - position += getSizeInBytes(ordinal); + position += getSizeInBytes(i); } return position; } diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java index c1f97eb81d3..44a1972792f 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java @@ -19,7 +19,9 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.DataInput; import java.io.DataInputStream; +import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.io.Serializable; @@ -29,10 +31,12 @@ import org.apache.carbondata.core.metadata.blocklet.datachunk.DataChunk; import org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex; +import org.apache.hadoop.io.Writable; + /** * class to store the information about the blocklet */ -public class BlockletInfo implements Serializable { +public class BlockletInfo implements Serializable, Writable { /** * serialization id @@ -195,9 +199,7 @@ public void setNumberOfPages(int numberOfPages) { this.numberOfPages = numberOfPages; } - public byte[] getSerializedData() throws IOException { - ByteArrayOutputStream stream = new ByteArrayOutputStream(); - DataOutputStream output = new DataOutputStream(stream); + @Override public void write(DataOutput output) throws IOException { output.writeLong(dimensionOffset); output.writeLong(measureOffsets); int dsize = dimensionChunkOffsets != null ? dimensionChunkOffsets.size() : 0; @@ -216,15 +218,9 @@ public byte[] getSerializedData() throws IOException { for (int i = 0; i < mSize; i++) { output.writeInt(measureChunksLength.get(i)); } - - output.close(); - return stream.toByteArray(); } - public void writeSerializedData(byte[] data) throws IOException { - ByteArrayInputStream stream = new ByteArrayInputStream(data); - DataInputStream input = new DataInputStream(stream); - + @Override public void readFields(DataInput input) throws IOException { dimensionOffset = input.readLong(); measureOffsets = input.readLong(); short dimensionChunkOffsetsSize = input.readShort(); @@ -247,7 +243,5 @@ public void writeSerializedData(byte[] data) throws IOException { measureChunksLength.add(input.readInt()); } - input.close(); } - } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index ff54673b711..03c04efdf49 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -41,6 +41,7 @@ import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.block.TableBlockInfo; import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier; +import org.apache.carbondata.core.indexstore.blockletindex.IndexWrapper; import org.apache.carbondata.core.keygenerator.KeyGenException; import org.apache.carbondata.core.keygenerator.KeyGenerator; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; @@ -116,23 +117,26 @@ protected void initQuery(QueryModel queryModel) throws IOException { // so block will be loaded in sorted order this will be required for // query execution Collections.sort(queryModel.getTableBlockInfos()); - // get the table blocks - CacheProvider cacheProvider = CacheProvider.getInstance(); - BlockIndexStore cache = - (BlockIndexStore) cacheProvider - .createCache(CacheType.EXECUTOR_BTREE, queryModel.getTable().getStorePath()); - // remove the invalid table blocks, block which is deleted or compacted - cache.removeTableBlocks(queryModel.getInvalidSegmentIds(), - queryModel.getAbsoluteTableIdentifier()); - List tableBlockUniqueIdentifiers = - prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(), - queryModel.getAbsoluteTableIdentifier()); - cache.removeTableBlocksIfHorizontalCompactionDone(queryModel); - queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers); - queryStatistic - .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis()); - queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic); + if (queryModel.getTableBlockInfos().get(0).getDetailInfo() != null) { + List indexList = new ArrayList<>(); + // TODO seperate index for each block + indexList.add(new IndexWrapper(queryModel.getTableBlockInfos())); + queryProperties.dataBlocks = indexList; + } else { + // get the table blocks + CacheProvider cacheProvider = CacheProvider.getInstance(); + BlockIndexStore cache = (BlockIndexStore) cacheProvider + .createCache(CacheType.EXECUTOR_BTREE, queryModel.getTable().getStorePath()); + // remove the invalid table blocks, block which is deleted or compacted + cache.removeTableBlocks(queryModel.getInvalidSegmentIds(), queryModel.getAbsoluteTableIdentifier()); + List tableBlockUniqueIdentifiers = + prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(), queryModel.getAbsoluteTableIdentifier()); + cache.removeTableBlocksIfHorizontalCompactionDone(queryModel); + queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers); + queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis()); + queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic); + } // calculating the total number of aggeragted columns int aggTypeCount = queryModel.getQueryMeasures().size(); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java b/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java index fdb54833d04..ff4f5dd6099 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/processor/AbstractDataBlockIterator.java @@ -165,6 +165,9 @@ private BlocksChunkHolder getBlocksChunkHolderInternal() throws IOException { new BlocksChunkHolder(blockExecutionInfo.getTotalNumberDimensionBlock(), blockExecutionInfo.getTotalNumberOfMeasureBlock(), fileReader); blocksChunkHolder.setDataBlock(dataBlockIterator.next()); + if (blocksChunkHolder.getDataBlock().getColumnsMaxValue() == null) { + return blocksChunkHolder; + } if (blockletScanner.isScanRequired(blocksChunkHolder)) { return blocksChunkHolder; } diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java index 92e95940e47..8a471c17e24 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java @@ -32,6 +32,7 @@ import org.apache.carbondata.core.datastore.block.AbstractIndex; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.datastore.impl.btree.BTreeDataRefNodeFinder; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataRefNodeWrapper; import org.apache.carbondata.core.mutate.DeleteDeltaVo; import org.apache.carbondata.core.reader.CarbonDeleteFilesDataReader; import org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo; @@ -127,20 +128,26 @@ private void intialiseInfos() { // set the deleted row to block execution info blockInfo.setDeletedRecordsMap(deletedRowsMap); } - DataRefNode startDataBlock = finder - .findFirstDataBlock(blockInfo.getDataBlock().getDataRefNode(), blockInfo.getStartKey()); - while (startDataBlock.nodeNumber() < blockInfo.getStartBlockletIndex()) { - startDataBlock = startDataBlock.getNextDataRefNode(); - } - long numberOfBlockToScan = blockInfo.getNumberOfBlockletToScan(); - //if number of block is less than 0 then take end block. - if (numberOfBlockToScan <= 0) { - DataRefNode endDataBlock = finder - .findLastDataBlock(blockInfo.getDataBlock().getDataRefNode(), blockInfo.getEndKey()); - numberOfBlockToScan = endDataBlock.nodeNumber() - startDataBlock.nodeNumber() + 1; + DataRefNode dataRefNode = blockInfo.getDataBlock().getDataRefNode(); + if (dataRefNode instanceof BlockletDataRefNodeWrapper) { + BlockletDataRefNodeWrapper wrapper = (BlockletDataRefNodeWrapper)dataRefNode; + blockInfo.setFirstDataBlock(wrapper); + blockInfo.setNumberOfBlockToScan(wrapper.numberOfNodes()); + + } else { + DataRefNode startDataBlock = finder.findFirstDataBlock(dataRefNode, blockInfo.getStartKey()); + while (startDataBlock.nodeNumber() < blockInfo.getStartBlockletIndex()) { + startDataBlock = startDataBlock.getNextDataRefNode(); + } + long numberOfBlockToScan = blockInfo.getNumberOfBlockletToScan(); + //if number of block is less than 0 then take end block. + if (numberOfBlockToScan <= 0) { + DataRefNode endDataBlock = finder.findLastDataBlock(dataRefNode, blockInfo.getEndKey()); + numberOfBlockToScan = endDataBlock.nodeNumber() - startDataBlock.nodeNumber() + 1; + } + blockInfo.setFirstDataBlock(startDataBlock); + blockInfo.setNumberOfBlockToScan(numberOfBlockToScan); } - blockInfo.setFirstDataBlock(startDataBlock); - blockInfo.setNumberOfBlockToScan(numberOfBlockToScan); } } diff --git a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java index 2064bad14b6..34c77099828 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java +++ b/core/src/main/java/org/apache/carbondata/core/util/AbstractDataFileFooterConverter.java @@ -199,6 +199,8 @@ protected int getBlockletSize(BlockIndex readBlockIndexInfo) { public abstract DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo) throws IOException; + public abstract List getSchema(TableBlockInfo tableBlockInfo) throws IOException; + /** * Below method will be used to get blocklet index for data file meta * diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index 200d5ca6158..d57f2a24146 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -1553,24 +1553,23 @@ public static boolean isFileExistsForGivenColumn(String carbonStorePath, } /** - * @param tableInfo * @param invalidBlockVOForSegmentId * @param updateStatusMngr * @return */ - public static boolean isInvalidTableBlock(TableBlockInfo tableInfo, + public static boolean isInvalidTableBlock(String segmentId, String filePath, UpdateVO invalidBlockVOForSegmentId, SegmentUpdateStatusManager updateStatusMngr) { - if (!updateStatusMngr.isBlockValid(tableInfo.getSegmentId(), - CarbonTablePath.getCarbonDataFileName(tableInfo.getFilePath()) + CarbonTablePath + if (!updateStatusMngr.isBlockValid(segmentId, + CarbonTablePath.getCarbonDataFileName(filePath) + CarbonTablePath .getCarbonDataExtension())) { return true; } if (null != invalidBlockVOForSegmentId) { - Long blockTimeStamp = Long.parseLong(tableInfo.getFilePath() - .substring(tableInfo.getFilePath().lastIndexOf('-') + 1, - tableInfo.getFilePath().lastIndexOf('.'))); + Long blockTimeStamp = Long.parseLong(filePath + .substring(filePath.lastIndexOf('-') + 1, + filePath.lastIndexOf('.'))); if ((blockTimeStamp > invalidBlockVOForSegmentId.getFactTimestamp() && ( invalidBlockVOForSegmentId.getUpdateDeltaStartTimestamp() != null && blockTimeStamp < invalidBlockVOForSegmentId.getUpdateDeltaStartTimestamp()))) { diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java index 0f82b952d59..3ac6987fe36 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter.java @@ -121,4 +121,8 @@ private BlockletInfo getBlockletInfo( blockletInfo.setNumberOfRows(blockletInfoThrift.getNum_rows()); return blockletInfo; } + + @Override public List getSchema(TableBlockInfo tableBlockInfo) throws IOException { + return null; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java index 4882b0f85fe..8cd437f43f2 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverter2.java @@ -140,4 +140,7 @@ private int getNumberOfDimensionColumns(List columnSchemaList) { return numberOfDimensionColumns; } + @Override public List getSchema(TableBlockInfo tableBlockInfo) throws IOException { + return null; + } } diff --git a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java index 143c1b1a3ed..ccb8b29a3a8 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DataFileFooterConverterV3.java @@ -85,6 +85,17 @@ public class DataFileFooterConverterV3 extends AbstractDataFileFooterConverter { return dataFileFooter; } + @Override public List getSchema(TableBlockInfo tableBlockInfo) throws IOException { + CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(tableBlockInfo.getFilePath()); + FileHeader fileHeader = carbonHeaderReader.readHeader(); + List columnSchemaList = new ArrayList(); + List table_columns = fileHeader.getColumn_schema(); + for (int i = 0; i < table_columns.size(); i++) { + columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i))); + } + return columnSchemaList; + } + /** * Below method is to convert the blocklet info of the thrift to wrapper * blocklet info diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java index 1e696482230..d03ae3ae86a 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormat.java @@ -19,7 +19,14 @@ import java.io.File; import java.io.IOException; import java.lang.reflect.Constructor; -import java.util.*; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.DataRefNode; @@ -367,8 +374,9 @@ private List getSplits(JobContext job, FilterResolverIntf filterReso if (isIUDTable) { // In case IUD is not performed in this table avoid searching for // invalidated blocks. - if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, invalidBlockVOForSegmentId, - updateStatusManager)) { + if (CarbonUtil + .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(), + invalidBlockVOForSegmentId, updateStatusManager)) { continue; } } diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java new file mode 100644 index 00000000000..778f68723d7 --- /dev/null +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java @@ -0,0 +1,538 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.hadoop; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.LinkedList; +import java.util.List; + +import org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier; +import org.apache.carbondata.core.indexstore.AbstractTableDataMap; +import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.DataMapStoreManager; +import org.apache.carbondata.core.indexstore.DataMapType; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.ColumnarFormatVersion; +import org.apache.carbondata.core.metadata.schema.PartitionInfo; +import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.UpdateVO; +import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.filter.FilterExpressionProcessor; +import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; +import org.apache.carbondata.core.scan.model.CarbonQueryPlan; +import org.apache.carbondata.core.scan.model.QueryModel; +import org.apache.carbondata.core.scan.partition.PartitionUtil; +import org.apache.carbondata.core.scan.partition.Partitioner; +import org.apache.carbondata.core.stats.QueryStatistic; +import org.apache.carbondata.core.stats.QueryStatisticsConstants; +import org.apache.carbondata.core.stats.QueryStatisticsRecorder; +import org.apache.carbondata.core.statusmanager.SegmentStatusManager; +import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager; +import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory; +import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.path.CarbonStorePath; +import org.apache.carbondata.core.util.path.CarbonTablePath; +import org.apache.carbondata.hadoop.readsupport.CarbonReadSupport; +import org.apache.carbondata.hadoop.readsupport.impl.DictionaryDecodeReadSupport; +import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil; +import org.apache.carbondata.hadoop.util.ObjectSerializationUtil; +import org.apache.carbondata.hadoop.util.SchemaReader; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.InvalidPathException; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.security.TokenCache; +import org.apache.hadoop.util.StringUtils; + +/** + * Carbon Input format class representing one carbon table + */ +public class CarbonInputFormatNew extends FileInputFormat { + + // comma separated list of input segment numbers + public static final String INPUT_SEGMENT_NUMBERS = + "mapreduce.input.carboninputformat.segmentnumbers"; + // comma separated list of input files + public static final String INPUT_FILES = "mapreduce.input.carboninputformat.files"; + private static final Log LOG = LogFactory.getLog(CarbonInputFormatNew.class); + private static final String FILTER_PREDICATE = + "mapreduce.input.carboninputformat.filter.predicate"; + private static final String COLUMN_PROJECTION = "mapreduce.input.carboninputformat.projection"; + private static final String CARBON_TABLE = "mapreduce.input.carboninputformat.table"; + private static final String CARBON_READ_SUPPORT = "mapreduce.input.carboninputformat.readsupport"; + + /** + * It is optional, if user does not set then it reads from store + * + * @param configuration + * @param carbonTable + * @throws IOException + */ + public static void setCarbonTable(Configuration configuration, CarbonTable carbonTable) + throws IOException { + if (null != carbonTable) { + configuration.set(CARBON_TABLE, ObjectSerializationUtil.convertObjectToString(carbonTable)); + } + } + + public static CarbonTable getCarbonTable(Configuration configuration) throws IOException { + String carbonTableStr = configuration.get(CARBON_TABLE); + if (carbonTableStr == null) { + populateCarbonTable(configuration); + // read it from schema file in the store + carbonTableStr = configuration.get(CARBON_TABLE); + return (CarbonTable) ObjectSerializationUtil.convertStringToObject(carbonTableStr); + } + return (CarbonTable) ObjectSerializationUtil.convertStringToObject(carbonTableStr); + } + + /** + * this method will read the schema from the physical file and populate into CARBON_TABLE + * + * @param configuration + * @throws IOException + */ + private static void populateCarbonTable(Configuration configuration) throws IOException { + String dirs = configuration.get(INPUT_DIR, ""); + String[] inputPaths = StringUtils.split(dirs); + if (inputPaths.length == 0) { + throw new InvalidPathException("No input paths specified in job"); + } + AbsoluteTableIdentifier absoluteTableIdentifier = + AbsoluteTableIdentifier.fromTablePath(inputPaths[0]); + // read the schema file to get the absoluteTableIdentifier having the correct table id + // persisted in the schema + CarbonTable carbonTable = SchemaReader.readCarbonTableFromStore(absoluteTableIdentifier); + setCarbonTable(configuration, carbonTable); + } + + public static void setTablePath(Configuration configuration, String tablePath) + throws IOException { + configuration.set(FileInputFormat.INPUT_DIR, tablePath); + } + + /** + * It sets unresolved filter expression. + * + * @param configuration + * @param filterExpression + */ + public static void setFilterPredicates(Configuration configuration, Expression filterExpression) { + if (filterExpression == null) { + return; + } + try { + String filterString = ObjectSerializationUtil.convertObjectToString(filterExpression); + configuration.set(FILTER_PREDICATE, filterString); + } catch (Exception e) { + throw new RuntimeException("Error while setting filter expression to Job", e); + } + } + + public static void setColumnProjection(Configuration configuration, CarbonProjection projection) { + if (projection == null || projection.isEmpty()) { + return; + } + String[] allColumns = projection.getAllColumns(); + StringBuilder builder = new StringBuilder(); + for (String column : allColumns) { + builder.append(column).append(","); + } + String columnString = builder.toString(); + columnString = columnString.substring(0, columnString.length() - 1); + configuration.set(COLUMN_PROJECTION, columnString); + } + + public static String getColumnProjection(Configuration configuration) { + return configuration.get(COLUMN_PROJECTION); + } + + public static void setCarbonReadSupport(Configuration configuration, + Class readSupportClass) { + if (readSupportClass != null) { + configuration.set(CARBON_READ_SUPPORT, readSupportClass.getName()); + } + } + + private static CarbonTablePath getTablePath(AbsoluteTableIdentifier absIdentifier) { + return CarbonStorePath.getCarbonTablePath(absIdentifier); + } + + /** + * Set list of segments to access + */ + public static void setSegmentsToAccess(Configuration configuration, List validSegments) { + configuration.set(CarbonInputFormatNew.INPUT_SEGMENT_NUMBERS, + CarbonUtil.getSegmentString(validSegments)); + } + + /** + * Set list of files to access + */ + public static void setFilesToAccess(Configuration configuration, List validFiles) { + configuration.set(CarbonInputFormatNew.INPUT_FILES, CarbonUtil.getSegmentString(validFiles)); + } + + private static AbsoluteTableIdentifier getAbsoluteTableIdentifier(Configuration configuration) + throws IOException { + return getCarbonTable(configuration).getAbsoluteTableIdentifier(); + } + + /** + * {@inheritDoc} + * Configurations FileInputFormat.INPUT_DIR + * are used to get table path to read. + * + * @param job + * @return List list of CarbonInputSplit + * @throws IOException + */ + @Override public List getSplits(JobContext job) throws IOException { + AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration()); + CacheClient cacheClient = new CacheClient(identifier.getStorePath()); + AbstractTableDataMap blockletMap = + DataMapStoreManager.getInstance().getDataMap(identifier, "blocklet", DataMapType.BLOCKLET); + try { + List invalidSegments = new ArrayList<>(); + List invalidTimestampsList = new ArrayList<>(); + + // get all valid segments and set them into the configuration + if (getSegmentsToAccess(job).length == 0) { + SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier); + SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = + segmentStatusManager.getValidAndInvalidSegments(); + SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier); + setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments()); + if (segments.getValidSegments().size() == 0) { + return new ArrayList<>(0); + } + + // remove entry in the segment index if there are invalid segments + invalidSegments.addAll(segments.getInvalidSegments()); + for (String invalidSegmentId : invalidSegments) { + invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId)); + } + if (invalidSegments.size() > 0) { + List invalidSegmentsIds = + new ArrayList<>(invalidSegments.size()); + for (String segId : invalidSegments) { + invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId)); + } + cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds); + blockletMap.clear(invalidSegments); + } + } + + // process and resolve the expression + Expression filter = getFilterPredicates(job.getConfiguration()); + CarbonTable carbonTable = getCarbonTable(job.getConfiguration()); + // this will be null in case of corrupt schema file. + if (null == carbonTable) { + throw new IOException("Missing/Corrupt schema file for table."); + } + + CarbonInputFormatUtil.processFilterExpression(filter, carbonTable); + + // prune partitions for filter query on partition table + BitSet matchedPartitions = null; + if (null != filter) { + PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName()); + if (null != partitionInfo) { + Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo); + matchedPartitions = new FilterExpressionProcessor() + .getFilteredPartitions(filter, partitionInfo, partitioner); + if (matchedPartitions.cardinality() == 0) { + // no partition is required + return new ArrayList(); + } + if (matchedPartitions.cardinality() == partitioner.numPartitions()) { + // all partitions are required, no need to prune partitions + matchedPartitions = null; + } + } + } + + FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier); + + // do block filtering and get split + List splits = getSplits(job, filterInterface, matchedPartitions, cacheClient); + // pass the invalid segment to task side in order to remove index entry in task side + if (invalidSegments.size() > 0) { + for (InputSplit split : splits) { + ((CarbonInputSplit) split).setInvalidSegments(invalidSegments); + ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList); + } + } + return splits; + } finally { + // close the cache cache client to clear LRU cache memory + cacheClient.close(); + } + } + + /** + * {@inheritDoc} + * Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS + * are used to get table path to read. + * + * @return + * @throws IOException + */ + private List getSplits(JobContext job, FilterResolverIntf filterResolver, + BitSet matchedPartitions, CacheClient cacheClient) throws IOException { + + List result = new LinkedList(); + FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor(); + UpdateVO invalidBlockVOForSegmentId = null; + Boolean isIUDTable = false; + + AbsoluteTableIdentifier absoluteTableIdentifier = + getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier(); + SegmentUpdateStatusManager updateStatusManager = + new SegmentUpdateStatusManager(absoluteTableIdentifier); + + isIUDTable = (updateStatusManager.getUpdateStatusDetails().length != 0); + + //for each segment fetch blocks matching filter in Driver BTree + List dataBlocksOfSegment = + getDataBlocksOfSegment(job, absoluteTableIdentifier, filterResolver, matchedPartitions, + Arrays.asList(getSegmentsToAccess(job))); + for (CarbonInputSplit inputSplit : dataBlocksOfSegment) { + + // Get the UpdateVO for those tables on which IUD operations being performed. + if (isIUDTable) { + invalidBlockVOForSegmentId = + updateStatusManager.getInvalidTimestampRange(inputSplit.getSegmentId()); + } + if (isIUDTable) { + // In case IUD is not performed in this table avoid searching for + // invalidated blocks. + if (CarbonUtil + .isInvalidTableBlock(inputSplit.getSegmentId(), inputSplit.getPath().toString(), + invalidBlockVOForSegmentId, updateStatusManager)) { + continue; + } + } + String[] deleteDeltaFilePath = null; + try { + deleteDeltaFilePath = + updateStatusManager.getDeleteDeltaFilePath(inputSplit.getPath().toString()); + } catch (Exception e) { + throw new IOException(e); + } + inputSplit.setDeleteDeltaFiles(deleteDeltaFilePath); + result.add(inputSplit); + } + return result; + } + + protected Expression getFilterPredicates(Configuration configuration) { + try { + String filterExprString = configuration.get(FILTER_PREDICATE); + if (filterExprString == null) { + return null; + } + Object filter = ObjectSerializationUtil.convertStringToObject(filterExprString); + return (Expression) filter; + } catch (IOException e) { + throw new RuntimeException("Error while reading filter expression", e); + } + } + + /** + * get data blocks of given segment + */ + private List getDataBlocksOfSegment(JobContext job, + AbsoluteTableIdentifier absoluteTableIdentifier, FilterResolverIntf resolver, + BitSet matchedPartitions, List segmentIds) throws IOException { + + QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder(); + QueryStatistic statistic = new QueryStatistic(); + + // get tokens for all the required FileSystem for table path + TokenCache.obtainTokensForNamenodes(job.getCredentials(), + new Path[] { new Path(absoluteTableIdentifier.getTablePath()) }, job.getConfiguration()); + + AbstractTableDataMap blockletMap = DataMapStoreManager.getInstance() + .getDataMap(absoluteTableIdentifier, "blocklet", DataMapType.BLOCKLET); + List prunedBlocklets = blockletMap.prune(segmentIds, resolver); + + List resultFilterredBlocks = new ArrayList<>(); + for (Blocklet blocklet : prunedBlocklets) { + int taskId = CarbonTablePath.DataFileUtil + .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath().toString())); + + // matchedPartitions variable will be null in two cases as follows + // 1. the table is not a partition table + // 2. the table is a partition table, and all partitions are matched by query + // for partition table, the task id of carbaondata file name is the partition id. + // if this partition is not required, here will skip it. + if (matchedPartitions == null || matchedPartitions.get(taskId)) { + resultFilterredBlocks.add(convertToCarbonInputSplit(blocklet)); + } + } + statistic + .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis()); + recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id")); + return resultFilterredBlocks; + } + + private CarbonInputSplit convertToCarbonInputSplit(Blocklet blocklet) throws IOException { + blocklet.updateLocations(); + CarbonInputSplit split = CarbonInputSplit.from(blocklet.getSegmentId(), + new FileSplit(blocklet.getPath(), 0, blocklet.getLength(), blocklet.getLocations()), + ColumnarFormatVersion.valueOf((short) blocklet.getDetailInfo().getVersionNumber())); + split.setDetailInfo(blocklet.getDetailInfo()); + return split; + } + + @Override public RecordReader createRecordReader(InputSplit inputSplit, + TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { + Configuration configuration = taskAttemptContext.getConfiguration(); + QueryModel queryModel = getQueryModel(inputSplit, taskAttemptContext); + CarbonReadSupport readSupport = getReadSupportClass(configuration); + return new CarbonRecordReader(queryModel, readSupport); + } + + public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) + throws IOException { + Configuration configuration = taskAttemptContext.getConfiguration(); + CarbonTable carbonTable = getCarbonTable(configuration); + // getting the table absoluteTableIdentifier from the carbonTable + // to avoid unnecessary deserialization + AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier(); + + // query plan includes projection column + String projection = getColumnProjection(configuration); + CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection); + QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable); + + // set the filter to the query model in order to filter blocklet before scan + Expression filter = getFilterPredicates(configuration); + CarbonInputFormatUtil.processFilterExpression(filter, carbonTable); + FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier); + queryModel.setFilterExpressionResolverTree(filterIntf); + + // update the file level index store if there are invalid segment + if (inputSplit instanceof CarbonMultiBlockSplit) { + CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit; + List invalidSegments = split.getAllSplits().get(0).getInvalidSegments(); + if (invalidSegments.size() > 0) { + queryModel.setInvalidSegmentIds(invalidSegments); + } + List invalidTimestampRangeList = + split.getAllSplits().get(0).getInvalidTimestampRange(); + if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) { + queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList); + } + } + return queryModel; + } + + public CarbonReadSupport getReadSupportClass(Configuration configuration) { + String readSupportClass = configuration.get(CARBON_READ_SUPPORT); + //By default it uses dictionary decoder read class + CarbonReadSupport readSupport = null; + if (readSupportClass != null) { + try { + Class myClass = Class.forName(readSupportClass); + Constructor constructor = myClass.getConstructors()[0]; + Object object = constructor.newInstance(); + if (object instanceof CarbonReadSupport) { + readSupport = (CarbonReadSupport) object; + } + } catch (ClassNotFoundException ex) { + LOG.error("Class " + readSupportClass + "not found", ex); + } catch (Exception ex) { + LOG.error("Error while creating " + readSupportClass, ex); + } + } else { + readSupport = new DictionaryDecodeReadSupport<>(); + } + return readSupport; + } + + @Override protected boolean isSplitable(JobContext context, Path filename) { + try { + // Don't split the file if it is local file system + FileSystem fileSystem = filename.getFileSystem(context.getConfiguration()); + if (fileSystem instanceof LocalFileSystem) { + return false; + } + } catch (Exception e) { + return true; + } + return true; + } + + /** + * required to be moved to core + * + * @return updateExtension + */ + private String getUpdateExtension() { + // TODO: required to modify when supporting update, mostly will be update timestamp + return "update"; + } + + /** + * return valid segment to access + */ + private String[] getSegmentsToAccess(JobContext job) { + String segmentString = job.getConfiguration().get(INPUT_SEGMENT_NUMBERS, ""); + if (segmentString.trim().isEmpty()) { + return new String[0]; + } + return segmentString.split(","); + } + + /** + * return valid file to access + */ + private String[] getFilesToAccess(JobContext job) { + String fileString = job.getConfiguration().get(INPUT_FILES, ""); + if (fileString.trim().isEmpty()) { + return new String[0]; + } + return fileString.split(","); + } + + /** + * required to be moved to core + * + * @return updateExtension + */ + private String[] getValidPartitions(JobContext job) { + //TODO: has to Identify partitions by partition pruning + return new String[] { "0" }; + } + +} diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java index 631bc2cd9d2..567894c7294 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java @@ -29,6 +29,7 @@ import org.apache.carbondata.core.datastore.block.BlockletInfos; import org.apache.carbondata.core.datastore.block.Distributable; import org.apache.carbondata.core.datastore.block.TableBlockInfo; +import org.apache.carbondata.core.indexstore.BlockletDetailInfo; import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.mutate.UpdateVO; import org.apache.carbondata.core.util.ByteUtil; @@ -77,6 +78,8 @@ public class CarbonInputSplit extends FileSplit */ private String[] deleteDeltaFiles; + private BlockletDetailInfo detailInfo; + public CarbonInputSplit() { segmentId = null; taskId = "0"; @@ -138,10 +141,12 @@ public static List createBlocks(List splitList BlockletInfos blockletInfos = new BlockletInfos(split.getNumberOfBlocklets(), 0, split.getNumberOfBlocklets()); try { - tableBlockInfoList.add( + TableBlockInfo blockInfo = new TableBlockInfo(split.getPath().toString(), split.getStart(), split.getSegmentId(), split.getLocations(), split.getLength(), blockletInfos, split.getVersion(), - split.getDeleteDeltaFiles())); + split.getDeleteDeltaFiles()); + blockInfo.setDetailInfo(split.getDetailInfo()); + tableBlockInfoList.add(blockInfo); } catch (IOException e) { throw new RuntimeException("fail to get location of split: " + split, e); } @@ -180,6 +185,11 @@ public String getSegmentId() { for (int i = 0; i < numberOfDeleteDeltaFiles; i++) { deleteDeltaFiles[i] = in.readUTF(); } + boolean detailInfoExists = in.readBoolean(); + if (detailInfoExists) { + detailInfo = new BlockletDetailInfo(); + detailInfo.readFields(in); + } } @Override public void write(DataOutput out) throws IOException { @@ -197,6 +207,10 @@ public String getSegmentId() { out.writeUTF(deleteDeltaFiles[i]); } } + out.writeBoolean(detailInfo != null); + if (detailInfo != null) { + detailInfo.write(out); + } } public List getInvalidSegments() { @@ -310,4 +324,16 @@ public Map getBlockStorageIdMap() { public String[] getDeleteDeltaFiles() { return deleteDeltaFiles; } + + public void setDeleteDeltaFiles(String[] deleteDeltaFiles) { + this.deleteDeltaFiles = deleteDeltaFiles; + } + + public BlockletDetailInfo getDetailInfo() { + return detailInfo; + } + + public void setDetailInfo(BlockletDetailInfo detailInfo) { + this.detailInfo = detailInfo; + } } diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala index caa389aa940..27158acbfd2 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala @@ -308,7 +308,8 @@ class CarbonMergerRDD[K, V]( updateStatusManager.getDeleteDeltaFilePath(entry.getPath.toString) ) ((!updated) || ((updated) && (!CarbonUtil - .isInvalidTableBlock(blockInfo, updateDetails, updateStatusManager)))) + .isInvalidTableBlock(blockInfo.getSegmentId, blockInfo.getFilePath, + updateDetails, updateStatusManager)))) }) } diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala index 4807b90b6ec..4718f611956 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala @@ -245,21 +245,21 @@ class CarbonScanRDD( iterator.asInstanceOf[Iterator[InternalRow]] } - private def prepareInputFormatForDriver(conf: Configuration): CarbonInputFormat[Object] = { - CarbonInputFormat.setCarbonTable(conf, carbonTable) + private def prepareInputFormatForDriver(conf: Configuration): CarbonInputFormatNew[Object] = { + CarbonInputFormatNew.setCarbonTable(conf, carbonTable) createInputFormat(conf) } - private def prepareInputFormatForExecutor(conf: Configuration): CarbonInputFormat[Object] = { - CarbonInputFormat.setCarbonReadSupport(conf, readSupport) + private def prepareInputFormatForExecutor(conf: Configuration): CarbonInputFormatNew[Object] = { + CarbonInputFormatNew.setCarbonReadSupport(conf, readSupport) createInputFormat(conf) } - private def createInputFormat(conf: Configuration): CarbonInputFormat[Object] = { - val format = new CarbonInputFormat[Object] - CarbonInputFormat.setTablePath(conf, identifier.appendWithLocalPrefix(identifier.getTablePath)) - CarbonInputFormat.setFilterPredicates(conf, filterExpression) - CarbonInputFormat.setColumnProjection(conf, columnProjection) + private def createInputFormat(conf: Configuration): CarbonInputFormatNew[Object] = { + val format = new CarbonInputFormatNew[Object] + CarbonInputFormatNew.setTablePath(conf, identifier.appendWithLocalPrefix(identifier.getTablePath)) + CarbonInputFormatNew.setFilterPredicates(conf, filterExpression) + CarbonInputFormatNew.setColumnProjection(conf, columnProjection) format } From 579a9ecea16ab9f837605543e1991f34e20d4420 Mon Sep 17 00:00:00 2001 From: ravipesala Date: Sun, 25 Jun 2017 17:15:49 +0530 Subject: [PATCH 3/4] Added LRU cache to blocklet data map. --- .../carbondata/core/cache/CacheProvider.java | 3 + .../carbondata/core/cache/CacheType.java | 6 + .../core/indexstore/AbstractTableDataMap.java | 11 +- .../carbondata/core/indexstore/Blocklet.java | 3 +- .../indexstore/BlockletDataMapIndexStore.java | 180 ++++++++++++++++++ .../TableBlockIndexUniqueIdentifier.java | 103 ++++++++++ .../blockletindex/BlockletDataMap.java | 19 +- .../blockletindex/BlockletTableMap.java | 44 +++-- .../core/indexstore/row/DataMapRowImpl.java | 18 +- .../core/indexstore/row/UnsafeDataMapRow.java | 19 +- .../core/metadata/blocklet/BlockletInfo.java | 4 - .../executor/impl/AbstractQueryExecutor.java | 30 ++- .../executer/IncludeFilterExecuterImpl.java | 2 +- .../RangeValueFilterExecuterImpl.java | 2 +- ...RowLevelRangeGrtThanFiterExecuterImpl.java | 2 +- ...RangeGrtrThanEquaToFilterExecuterImpl.java | 2 +- ...lRangeLessThanEqualFilterExecuterImpl.java | 2 +- ...owLevelRangeLessThanFiterExecuterImpl.java | 2 +- .../AbstractDetailQueryResultIterator.java | 5 +- .../hadoop/CarbonInputFormatNew.java | 25 +-- .../presto/impl/CarbonTableReader.java | 56 ++++-- .../spark/rdd/CarbonIUDMergerRDD.scala | 4 +- .../spark/rdd/CarbonMergerRDD.scala | 4 +- .../carbondata/spark/rdd/CarbonScanRDD.scala | 3 +- .../sql/CarbonDatasourceHadoopRelation.scala | 12 +- 25 files changed, 471 insertions(+), 90 deletions(-) create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java create mode 100644 core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java b/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java index 25a897622ff..5c4b2652e89 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/CacheProvider.java @@ -31,6 +31,7 @@ import org.apache.carbondata.core.datastore.SegmentTaskIndexStore; import org.apache.carbondata.core.datastore.block.AbstractIndex; import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier; +import org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore; import org.apache.carbondata.core.util.CarbonProperties; /** @@ -126,6 +127,8 @@ private void createDictionaryCacheForGivenType(CacheType cacheType, String carbo } else if (cacheType.equals(cacheType.DRIVER_BTREE)) { cacheObject = new SegmentTaskIndexStore(carbonStorePath, carbonLRUCache); + } else if (cacheType.equals(cacheType.DRIVER_BLOCKLET_DATAMAP)) { + cacheObject = new BlockletDataMapIndexStore(carbonStorePath, carbonLRUCache); } cacheTypeToCacheMap.put(cacheType, cacheObject); } diff --git a/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java b/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java index 2d6570da7f9..ab51ff20e59 100644 --- a/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java +++ b/core/src/main/java/org/apache/carbondata/core/cache/CacheType.java @@ -55,6 +55,12 @@ public class CacheType { public static final CacheType DRIVER_BTREE = new CacheType("driver_btree"); + /** + * Executor BTree cache which maintains size of BTree metadata + */ + public static final CacheType + DRIVER_BLOCKLET_DATAMAP = new CacheType("driver_blocklet_datamap"); + /** * cacheName which is unique name for a cache */ diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java index 87e06574e75..84391e010d4 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/AbstractTableDataMap.java @@ -48,7 +48,8 @@ public abstract class AbstractTableDataMap implements EventListener { * @param segmentId * @return */ - public abstract DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String segmentId); + public abstract DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, + String segmentId); /** * Pass the valid segments and prune the datamap using filter expression @@ -58,10 +59,10 @@ public abstract class AbstractTableDataMap implements EventListener { * @return */ public List prune(List segmentIds, FilterResolverIntf filterExp) { - List blocklets = new ArrayList<>(); - for (String segmentId: segmentIds) { + List blocklets = new ArrayList<>(); + for (String segmentId : segmentIds) { List dataMaps = getDataMaps(segmentId); - for (DataMap dataMap: dataMaps) { + for (DataMap dataMap : dataMaps) { List pruneBlocklets = dataMap.prune(filterExp); blocklets.addAll(addSegmentId(pruneBlocklets, segmentId)); } @@ -78,6 +79,7 @@ private List addSegmentId(List pruneBlocklets, String segmen /** * Get the datamap for segmentid + * * @param segmentId * @return */ @@ -106,6 +108,7 @@ public List prune(DataMapDistributable distributable, FilterResolverIn /** * Get datamap for distributable object. + * * @param distributable * @return */ diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java index 4f27b12f7d8..66da4d07b5d 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/Blocklet.java @@ -20,7 +20,6 @@ import java.io.Serializable; import org.apache.carbondata.core.datastore.impl.FileFactory; -import org.apache.carbondata.core.metadata.blocklet.BlockletInfo; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; @@ -42,7 +41,7 @@ public class Blocklet implements Serializable { private long length; - private String[] location;; + private String[] location; public Blocklet(String path, String blockletId) { this.path = new Path(path); diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java new file mode 100644 index 00000000000..fc8c2733ece --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDataMapIndexStore.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.carbondata.core.indexstore; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CarbonLRUCache; +import org.apache.carbondata.core.datastore.exception.IndexBuilderException; +import org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMap; + +/** + * Class to handle loading, unloading,clearing,storing of the table + * blocks + */ +public class BlockletDataMapIndexStore + implements Cache { + private static final LogService LOGGER = + LogServiceFactory.getLogService(BlockletDataMapIndexStore.class.getName()); + /** + * carbon store path + */ + protected String carbonStorePath; + /** + * CarbonLRU cache + */ + protected CarbonLRUCache lruCache; + + /** + * map of block info to lock object map, while loading the btree this will be filled + * and removed after loading the tree for that particular block info, this will be useful + * while loading the tree concurrently so only block level lock will be applied another + * block can be loaded concurrently + */ + private Map segmentLockMap; + + /** + * constructor to initialize the SegmentTaskIndexStore + * + * @param carbonStorePath + * @param lruCache + */ + public BlockletDataMapIndexStore(String carbonStorePath, CarbonLRUCache lruCache) { + this.carbonStorePath = carbonStorePath; + this.lruCache = lruCache; + segmentLockMap = new ConcurrentHashMap(); + } + + @Override public BlockletDataMap get(TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) + throws IOException { + String lruCacheKey = tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier(); + BlockletDataMap dataMap = (BlockletDataMap) lruCache.get(lruCacheKey); + if (dataMap == null) { + try { + dataMap = loadAndGetDataMap(tableSegmentUniqueIdentifier); + } catch (IndexBuilderException e) { + throw new IOException(e.getMessage(), e); + } catch (Throwable e) { + throw new IOException("Problem in loading segment block.", e); + } + } + return dataMap; + } + + @Override public List getAll( + List tableSegmentUniqueIdentifiers) throws IOException { + List blockletDataMaps = new ArrayList<>(tableSegmentUniqueIdentifiers.size()); + try { + for (TableBlockIndexUniqueIdentifier identifier : tableSegmentUniqueIdentifiers) { + blockletDataMaps.add(get(identifier)); + } + } catch (Throwable e) { + for (BlockletDataMap dataMap : blockletDataMaps) { + dataMap.clear(); + } + throw new IOException("Problem in loading segment blocks.", e); + } + return blockletDataMaps; + } + + /** + * returns the SegmentTaskIndexWrapper + * + * @param tableSegmentUniqueIdentifier + * @return + */ + @Override public BlockletDataMap getIfPresent( + TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) { + BlockletDataMap dataMap = (BlockletDataMap) lruCache + .get(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier()); + return dataMap; + } + + /** + * method invalidate the segment cache for segment + * + * @param tableSegmentUniqueIdentifier + */ + @Override public void invalidate(TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) { + lruCache.remove(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier()); + } + + /** + * Below method will be used to load the segment of segments + * One segment may have multiple task , so table segment will be loaded + * based on task id and will return the map of taksId to table segment + * map + * + * @return map of taks id to segment mapping + * @throws IOException + */ + private BlockletDataMap loadAndGetDataMap( + TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier) throws IOException { + String uniqueTableSegmentIdentifier = + tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier(); + Object lock = segmentLockMap.get(uniqueTableSegmentIdentifier); + if (lock == null) { + lock = addAndGetSegmentLock(uniqueTableSegmentIdentifier); + } + BlockletDataMap dataMap = null; + synchronized (lock) { + dataMap = new BlockletDataMap(); + dataMap.init(tableSegmentUniqueIdentifier.getFilePath()); + lruCache.put(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier(), dataMap, + dataMap.getMemorySize()); + } + return dataMap; + } + + /** + * Below method will be used to get the segment level lock object + * + * @param uniqueIdentifier + * @return lock object + */ + private synchronized Object addAndGetSegmentLock(String uniqueIdentifier) { + // get the segment lock object if it is present then return + // otherwise add the new lock and return + Object segmentLoderLockObject = segmentLockMap.get(uniqueIdentifier); + if (null == segmentLoderLockObject) { + segmentLoderLockObject = new Object(); + segmentLockMap.put(uniqueIdentifier, segmentLoderLockObject); + } + return segmentLoderLockObject; + } + + /** + * The method clears the access count of table segments + * + * @param tableSegmentUniqueIdentifiers + */ + @Override public void clearAccessCount( + List tableSegmentUniqueIdentifiers) { + for (TableBlockIndexUniqueIdentifier segmentUniqueIdentifier : tableSegmentUniqueIdentifiers) { + BlockletDataMap cacheable = + (BlockletDataMap) lruCache.get(segmentUniqueIdentifier.getUniqueTableSegmentIdentifier()); + cacheable.clear(); + } + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java b/core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java new file mode 100644 index 00000000000..7e2bc0e4f6e --- /dev/null +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/TableBlockIndexUniqueIdentifier.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.indexstore; + +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.CarbonTableIdentifier; + +/** + * Class holds the absoluteTableIdentifier and segmentId to uniquely identify a segment + */ +public class TableBlockIndexUniqueIdentifier { + /** + * table fully qualified identifier + */ + private AbsoluteTableIdentifier absoluteTableIdentifier; + + private String segmentId; + + private String carbonIndexFileName; + + /** + * Constructor to initialize the class instance + * + * @param absoluteTableIdentifier + * @param segmentId + */ + public TableBlockIndexUniqueIdentifier(AbsoluteTableIdentifier absoluteTableIdentifier, + String segmentId, String carbonIndexFileName) { + this.absoluteTableIdentifier = absoluteTableIdentifier; + this.segmentId = segmentId; + this.carbonIndexFileName = carbonIndexFileName; + } + + /** + * returns AbsoluteTableIdentifier + * + * @return + */ + public AbsoluteTableIdentifier getAbsoluteTableIdentifier() { + return absoluteTableIdentifier; + } + + public String getSegmentId() { + return segmentId; + } + + /** + * method returns the id to uniquely identify a key + * + * @return + */ + public String getUniqueTableSegmentIdentifier() { + CarbonTableIdentifier carbonTableIdentifier = + absoluteTableIdentifier.getCarbonTableIdentifier(); + return carbonTableIdentifier.getDatabaseName() + CarbonCommonConstants.FILE_SEPARATOR + + carbonTableIdentifier.getTableName() + CarbonCommonConstants.UNDERSCORE + + carbonTableIdentifier.getTableId() + CarbonCommonConstants.FILE_SEPARATOR + segmentId + + CarbonCommonConstants.FILE_SEPARATOR + carbonIndexFileName; + } + + public String getFilePath() { + return absoluteTableIdentifier.getTablePath() + "/Fact/Part0/Segment_" + segmentId + "/" + + carbonIndexFileName; + } + + @Override public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + TableBlockIndexUniqueIdentifier that = (TableBlockIndexUniqueIdentifier) o; + + if (!absoluteTableIdentifier.equals(that.absoluteTableIdentifier)) { + return false; + } + if (!segmentId.equals(that.segmentId)) { + return false; + } + return carbonIndexFileName.equals(that.carbonIndexFileName); + } + + @Override public int hashCode() { + int result = absoluteTableIdentifier.hashCode(); + result = 31 * result + segmentId.hashCode(); + result = 31 * result + carbonIndexFileName.hashCode(); + return result; + } +} diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java index 46c60c63284..85aa4aeb4f5 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java @@ -30,6 +30,7 @@ import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.cache.Cacheable; import org.apache.carbondata.core.datastore.IndexKey; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.block.TableBlockInfo; @@ -56,7 +57,7 @@ /** * Datamap implementation for blocklet. */ -public class BlockletDataMap implements DataMap { +public class BlockletDataMap implements DataMap, Cacheable { private static final LogService LOGGER = LogServiceFactory.getLogService(BlockletDataMap.class.getName()); @@ -103,6 +104,9 @@ public class BlockletDataMap implements DataMap { loadToUnsafe(fileFooter, segmentProperties, blockInfo.getFilePath()); } + if (unsafeMemoryDMStore != null) { + unsafeMemoryDMStore.finishWriting(); + } } catch (IOException e) { throw new RuntimeException(e); } @@ -152,7 +156,6 @@ private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentPr row.setByteArray(serializedData, ordinal); unsafeMemoryDMStore.addIndexRowToUnsafe(row); } - unsafeMemoryDMStore.finishWriting(); } private DataMapRow addMinMax(int[] minMaxLen, DataMapSchema dataMapSchema, byte[][] minValues) { @@ -412,4 +415,16 @@ private DataMapRow convertToRow(IndexKey key) { unsafeMemoryDMStore = null; segmentProperties = null; } + + @Override public long getFileTimeStamp() { + return 0; + } + + @Override public int getAccessCount() { + return 0; + } + + @Override public long getMemorySize() { + return unsafeMemoryDMStore.getMemoryUsed(); + } } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java index 18ede531967..44640c11944 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletTableMap.java @@ -16,20 +16,24 @@ */ package org.apache.carbondata.core.indexstore.blockletindex; +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.carbondata.core.cache.Cache; +import org.apache.carbondata.core.cache.CacheProvider; +import org.apache.carbondata.core.cache.CacheType; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.events.ChangeEvent; -import org.apache.carbondata.core.indexstore.Blocklet; +import org.apache.carbondata.core.indexstore.AbstractTableDataMap; import org.apache.carbondata.core.indexstore.DataMap; import org.apache.carbondata.core.indexstore.DataMapDistributable; import org.apache.carbondata.core.indexstore.DataMapWriter; -import org.apache.carbondata.core.indexstore.AbstractTableDataMap; +import org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; @@ -42,11 +46,15 @@ public class BlockletTableMap extends AbstractTableDataMap { private AbsoluteTableIdentifier identifier; - private Map> map = new HashMap<>(); + private Map> segmentMap = new HashMap<>(); + + private Cache cache; @Override public void init(AbsoluteTableIdentifier identifier, String dataMapName) { this.identifier = identifier; this.dataMapName = dataMapName; + cache = CacheProvider.getInstance() + .createCache(CacheType.DRIVER_BLOCKLET_DATAMAP, identifier.getStorePath()); } @Override public DataMapWriter getMetaDataWriter() { @@ -59,9 +67,10 @@ public DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String } @Override protected List getDataMaps(String segmentId) { - List dataMaps = map.get(segmentId); - if (dataMaps == null) { - dataMaps = new ArrayList<>(); + List tableBlockIndexUniqueIdentifiers = + segmentMap.get(segmentId); + if (tableBlockIndexUniqueIdentifiers == null) { + tableBlockIndexUniqueIdentifiers = new ArrayList<>(); String path = identifier.getTablePath() + "/Fact/Part0/Segment_" + segmentId; FileFactory.FileType fileType = FileFactory.getFileType(path); CarbonFile carbonFile = FileFactory.getCarbonFile(path, fileType); @@ -71,12 +80,16 @@ public DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String } }); for (int i = 0; i < listFiles.length; i++) { - BlockletDataMap dataMap = new BlockletDataMap(); - dataMap.init(listFiles[i].getAbsolutePath()); - dataMaps.add(dataMap); + tableBlockIndexUniqueIdentifiers.add( + new TableBlockIndexUniqueIdentifier(identifier, segmentId, listFiles[i].getName())); } } - return dataMaps; + + try { + return cache.getAll(tableBlockIndexUniqueIdentifiers); + } catch (IOException e) { + throw new RuntimeException(e); + } } @Override public List toDistributable(List segmentIds) { @@ -92,7 +105,16 @@ public DataMapWriter getDataMapWriter(AbsoluteTableIdentifier identifier, String } @Override public void clear(List segmentIds) { - + for (String segmentId : segmentIds) { + List blockIndexes = segmentMap.remove(segmentId); + if (blockIndexes != null) { + for (TableBlockIndexUniqueIdentifier blockIndex : blockIndexes) { + DataMap dataMap = cache.getIfPresent(blockIndex); + dataMap.clear(); + cache.invalidate(blockIndex); + } + } + } } @Override public void fireEvent(ChangeEvent event) { diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java index aa66723c5b2..adec346b374 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/DataMapRowImpl.java @@ -1,10 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.carbondata.core.indexstore.row; import org.apache.carbondata.core.indexstore.schema.DataMapSchema; import org.apache.carbondata.core.metadata.datatype.DataType; /** - * Created by root1 on 17/6/17. + * Data map row. */ public class DataMapRowImpl extends DataMapRow { diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java index a01e9fcb6ce..ef78514a0e5 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/row/UnsafeDataMapRow.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.carbondata.core.indexstore.row; import org.apache.carbondata.core.indexstore.schema.DataMapSchema; @@ -7,7 +24,7 @@ import static org.apache.carbondata.core.memory.CarbonUnsafe.unsafe; /** - * Created by root1 on 19/6/17. + * Unsafe implementation of data map row. */ public class UnsafeDataMapRow extends DataMapRow { diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java index 44a1972792f..f81f8056c92 100644 --- a/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/metadata/blocklet/BlockletInfo.java @@ -17,12 +17,8 @@ package org.apache.carbondata.core.metadata.blocklet; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.DataInput; -import java.io.DataInputStream; import java.io.DataOutput; -import java.io.DataOutputStream; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; diff --git a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java index 03c04efdf49..e0ee5bb2b5b 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java @@ -21,8 +21,10 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -120,21 +122,35 @@ protected void initQuery(QueryModel queryModel) throws IOException { if (queryModel.getTableBlockInfos().get(0).getDetailInfo() != null) { List indexList = new ArrayList<>(); - // TODO seperate index for each block - indexList.add(new IndexWrapper(queryModel.getTableBlockInfos())); + Map> listMap = new LinkedHashMap<>(); + for (TableBlockInfo blockInfo: queryModel.getTableBlockInfos()) { + List tableBlockInfos = listMap.get(blockInfo.getFilePath()); + if (tableBlockInfos == null) { + tableBlockInfos = new ArrayList<>(); + listMap.put(blockInfo.getFilePath(), tableBlockInfos); + } + tableBlockInfos.add(blockInfo); + } + for (List tableBlockInfos: listMap.values()) { + indexList.add(new IndexWrapper(tableBlockInfos)); + } queryProperties.dataBlocks = indexList; } else { // get the table blocks CacheProvider cacheProvider = CacheProvider.getInstance(); - BlockIndexStore cache = (BlockIndexStore) cacheProvider - .createCache(CacheType.EXECUTOR_BTREE, queryModel.getTable().getStorePath()); + BlockIndexStore cache = + (BlockIndexStore) cacheProvider + .createCache(CacheType.EXECUTOR_BTREE, queryModel.getTable().getStorePath()); // remove the invalid table blocks, block which is deleted or compacted - cache.removeTableBlocks(queryModel.getInvalidSegmentIds(), queryModel.getAbsoluteTableIdentifier()); + cache.removeTableBlocks(queryModel.getInvalidSegmentIds(), + queryModel.getAbsoluteTableIdentifier()); List tableBlockUniqueIdentifiers = - prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(), queryModel.getAbsoluteTableIdentifier()); + prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(), + queryModel.getAbsoluteTableIdentifier()); cache.removeTableBlocksIfHorizontalCompactionDone(queryModel); queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers); - queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis()); + queryStatistic + .addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis()); queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic); } // calculating the total number of aggeragted columns diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java index 8704496d18c..a874835383e 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/IncludeFilterExecuterImpl.java @@ -156,7 +156,7 @@ public BitSet isScanRequired(byte[][] blkMaxVal, byte[][] blkMinVal) { int columnIndex = dimColumnEvaluatorInfo.getColumnIndex(); int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping().get(columnIndex); - boolean isScanRequired = + boolean isScanRequired = blockIndex >= blkMaxVal.length || isScanRequired(blkMaxVal[blockIndex], blkMinVal[blockIndex], filterValues); if (isScanRequired) { bitSet.set(0); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java index 6823531ec24..c2e077e5406 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RangeValueFilterExecuterImpl.java @@ -287,7 +287,7 @@ public boolean isScanRequired(byte[] blockMinValue, byte[] blockMaxValue, byte[] BitSet bitSet = new BitSet(1); byte[][] filterValues = this.filterRangesValues; int columnIndex = this.dimColEvaluatorInfo.getColumnIndex(); - boolean isScanRequired = + boolean isScanRequired = columnIndex >= blockMinValue.length || isScanRequired(blockMinValue[columnIndex], blockMaxValue[columnIndex], filterValues); if (isScanRequired) { bitSet.set(0); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java index be82be752a2..73352cbdde5 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtThanFiterExecuterImpl.java @@ -79,7 +79,7 @@ private void ifDefaultValueMatchesFilter() { @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { BitSet bitSet = new BitSet(1); - boolean isScanRequired = + boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length || isScanRequired(blockMaxValue[dimensionBlocksIndex[0]], filterRangeValues); if (isScanRequired) { bitSet.set(0); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java index 53da6c536ca..6e8e188ac7a 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeGrtrThanEquaToFilterExecuterImpl.java @@ -81,7 +81,7 @@ private void ifDefaultValueMatchesFilter() { @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { BitSet bitSet = new BitSet(1); - boolean isScanRequired = + boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length || isScanRequired(blockMaxValue[dimensionBlocksIndex[0]], filterRangeValues); if (isScanRequired) { bitSet.set(0); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java index d69496039e9..d6f7c869571 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanEqualFilterExecuterImpl.java @@ -81,7 +81,7 @@ private void ifDefaultValueMatchesFilter() { @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { BitSet bitSet = new BitSet(1); - boolean isScanRequired = + boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length || isScanRequired(blockMinValue[dimensionBlocksIndex[0]], filterRangeValues); if (isScanRequired) { bitSet.set(0); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java index b3dd921f99a..597ba525c36 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/filter/executer/RowLevelRangeLessThanFiterExecuterImpl.java @@ -82,7 +82,7 @@ private void ifDefaultValueMatchesFilter() { @Override public BitSet isScanRequired(byte[][] blockMaxValue, byte[][] blockMinValue) { BitSet bitSet = new BitSet(1); - boolean isScanRequired = + boolean isScanRequired = dimensionBlocksIndex[0] >= blockMaxValue.length || isScanRequired(blockMinValue[dimensionBlocksIndex[0]], filterRangeValues); if (isScanRequired) { bitSet.set(0); diff --git a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java index 8a471c17e24..95030d3c050 100644 --- a/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java +++ b/core/src/main/java/org/apache/carbondata/core/scan/result/iterator/AbstractDetailQueryResultIterator.java @@ -130,12 +130,13 @@ private void intialiseInfos() { } DataRefNode dataRefNode = blockInfo.getDataBlock().getDataRefNode(); if (dataRefNode instanceof BlockletDataRefNodeWrapper) { - BlockletDataRefNodeWrapper wrapper = (BlockletDataRefNodeWrapper)dataRefNode; + BlockletDataRefNodeWrapper wrapper = (BlockletDataRefNodeWrapper) dataRefNode; blockInfo.setFirstDataBlock(wrapper); blockInfo.setNumberOfBlockToScan(wrapper.numberOfNodes()); } else { - DataRefNode startDataBlock = finder.findFirstDataBlock(dataRefNode, blockInfo.getStartKey()); + DataRefNode startDataBlock = + finder.findFirstDataBlock(dataRefNode, blockInfo.getStartKey()); while (startDataBlock.nodeNumber() < blockInfo.getStartBlockletIndex()) { startDataBlock = startDataBlock.getNextDataRefNode(); } diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java index 778f68723d7..bb91fc8665d 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java @@ -387,8 +387,8 @@ private List getDataBlocksOfSegment(JobContext job, List resultFilterredBlocks = new ArrayList<>(); for (Blocklet blocklet : prunedBlocklets) { - int taskId = CarbonTablePath.DataFileUtil - .getTaskIdFromTaskNo(CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath().toString())); + int taskId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo( + CarbonTablePath.DataFileUtil.getTaskNo(blocklet.getPath().toString())); // matchedPartitions variable will be null in two cases as follows // 1. the table is not a partition table @@ -514,25 +514,4 @@ private String[] getSegmentsToAccess(JobContext job) { return segmentString.split(","); } - /** - * return valid file to access - */ - private String[] getFilesToAccess(JobContext job) { - String fileString = job.getConfiguration().get(INPUT_FILES, ""); - if (fileString.trim().isEmpty()) { - return new String[0]; - } - return fileString.split(","); - } - - /** - * required to be moved to core - * - * @return updateExtension - */ - private String[] getValidPartitions(JobContext job) { - //TODO: has to Identify partitions by partition pruning - return new String[] { "0" }; - } - } diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java index c328a6485c4..01b4e61f3e4 100755 --- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java +++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java @@ -17,14 +17,31 @@ package org.apache.carbondata.presto.impl; -import com.facebook.presto.spi.SchemaTableName; -import com.facebook.presto.spi.classloader.ThreadContextClassLoader; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import com.google.inject.Inject; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; + import org.apache.carbondata.core.constants.CarbonCommonConstants; -import org.apache.carbondata.core.datastore.*; -import org.apache.carbondata.core.datastore.block.*; +import org.apache.carbondata.core.datastore.DataRefNode; +import org.apache.carbondata.core.datastore.DataRefNodeFinder; +import org.apache.carbondata.core.datastore.IndexKey; +import org.apache.carbondata.core.datastore.SegmentTaskIndexStore; +import org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier; +import org.apache.carbondata.core.datastore.block.AbstractIndex; +import org.apache.carbondata.core.datastore.block.BlockletInfos; +import org.apache.carbondata.core.datastore.block.SegmentProperties; +import org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper; +import org.apache.carbondata.core.datastore.block.TableBlockInfo; import org.apache.carbondata.core.datastore.exception.IndexBuilderException; import org.apache.carbondata.core.datastore.filesystem.CarbonFile; import org.apache.carbondata.core.datastore.impl.FileFactory; @@ -52,18 +69,24 @@ import org.apache.carbondata.core.util.path.CarbonTablePath; import org.apache.carbondata.hadoop.CacheClient; import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil; + +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.classloader.ThreadContextClassLoader; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.inject.Inject; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.thrift.TBase; -import java.io.IOException; -import java.util.*; -import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; -import java.util.stream.Stream; - import static java.util.Objects.requireNonNull; /** CarbonTableReader will be a facade of these utils @@ -312,8 +335,9 @@ public List getInputSplits2(CarbonTableCacheModel tableCa TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo(); if (IUDTable) { - if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, invalidBlockVOForSegmentId, - updateStatusManager)) { + if (CarbonUtil + .isInvalidTableBlock(tableBlockInfo.getSegmentId(), tableBlockInfo.getFilePath(), + invalidBlockVOForSegmentId, updateStatusManager)) { continue; } } diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala index 277005bc9fc..5ecd3f6ffd4 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonIUDMergerRDD.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.execution.command.CarbonMergerMapping import org.apache.carbondata.core.datastore.block.{Distributable, TableBlockInfo} import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, CarbonTableIdentifier} -import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputSplit, CarbonMultiBlockSplit} +import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputFormatNew, CarbonInputSplit, CarbonMultiBlockSplit} import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil import org.apache.carbondata.processing.merger.CarbonDataMergerUtil import org.apache.carbondata.processing.model.CarbonLoadModel @@ -71,7 +71,7 @@ class CarbonIUDMergerRDD[K, V]( var blocksOfLastSegment: List[TableBlockInfo] = null - CarbonInputFormat.setSegmentsToAccess( + CarbonInputFormatNew.setSegmentsToAccess( job.getConfiguration, carbonMergerMapping.validSegments.toList.asJava) // get splits diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala index 27158acbfd2..0cd3db5ded6 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala @@ -44,7 +44,7 @@ import org.apache.carbondata.core.scan.result.iterator.RawResultIterator import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil} import org.apache.carbondata.core.util.path.CarbonTablePath -import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputSplit, CarbonMultiBlockSplit} +import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputFormatNew, CarbonInputSplit, CarbonMultiBlockSplit} import org.apache.carbondata.hadoop.util.{CarbonInputFormatUtil, CarbonInputSplitTaskInfo} import org.apache.carbondata.processing.merger._ import org.apache.carbondata.processing.model.CarbonLoadModel @@ -286,7 +286,7 @@ class CarbonMergerRDD[K, V]( for (eachSeg <- carbonMergerMapping.validSegments) { // map for keeping the relation of a task and its blocks. - job.getConfiguration.set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, eachSeg) + job.getConfiguration.set(CarbonInputFormatNew.INPUT_SEGMENT_NUMBERS, eachSeg) if (updateStatusManager.getUpdateStatusDetails.length != 0) { updateDetails = updateStatusManager.getInvalidTimestampRange(eachSeg) diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala index 4718f611956..94240c746be 100644 --- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala +++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonScanRDD.scala @@ -257,7 +257,8 @@ class CarbonScanRDD( private def createInputFormat(conf: Configuration): CarbonInputFormatNew[Object] = { val format = new CarbonInputFormatNew[Object] - CarbonInputFormatNew.setTablePath(conf, identifier.appendWithLocalPrefix(identifier.getTablePath)) + CarbonInputFormatNew.setTablePath(conf, + identifier.appendWithLocalPrefix(identifier.getTablePath)) CarbonInputFormatNew.setFilterPredicates(conf, filterExpression) CarbonInputFormatNew.setColumnProjection(conf, columnProjection) format diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala index 0e6153fc278..ed5d829176a 100644 --- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala +++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceHadoopRelation.scala @@ -38,7 +38,7 @@ import org.apache.spark.util.SerializableConfiguration import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier import org.apache.carbondata.core.scan.expression.logical.AndExpression import org.apache.carbondata.core.util.path.CarbonTablePath -import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputSplit, CarbonProjection} +import org.apache.carbondata.hadoop.{CarbonInputFormat, CarbonInputFormatNew, CarbonInputSplit, CarbonProjection} import org.apache.carbondata.hadoop.util.{CarbonInputFormatUtil, SchemaReader} import org.apache.carbondata.processing.merger.TableMeta import org.apache.carbondata.spark.{CarbonFilters, CarbonOption} @@ -88,17 +88,17 @@ private[sql] case class CarbonDatasourceHadoopRelation( filters.flatMap { filter => CarbonFilters.createCarbonFilter(dataSchema, filter) }.reduceOption(new AndExpression(_, _)) - .foreach(CarbonInputFormat.setFilterPredicates(conf, _)) + .foreach(CarbonInputFormatNew.setFilterPredicates(conf, _)) val projection = new CarbonProjection requiredColumns.foreach(projection.addColumn) - CarbonInputFormat.setColumnProjection(conf, projection) - CarbonInputFormat.setCarbonReadSupport(conf, classOf[SparkRowReadSupportImpl]) + CarbonInputFormatNew.setColumnProjection(conf, projection) + CarbonInputFormatNew.setCarbonReadSupport(conf, classOf[SparkRowReadSupportImpl]) new CarbonHadoopFSRDD[Row](sqlContext.sparkContext, new SerializableConfiguration(conf), absIdentifier, - classOf[CarbonInputFormat[Row]], + classOf[CarbonInputFormatNew[Row]], classOf[Row] ) } @@ -118,7 +118,7 @@ class CarbonHadoopFSRDD[V: ClassTag]( @transient sc: SparkContext, conf: SerializableConfiguration, identifier: AbsoluteTableIdentifier, - inputFormatClass: Class[_ <: CarbonInputFormat[V]], + inputFormatClass: Class[_ <: CarbonInputFormatNew[V]], valueClass: Class[V]) extends RDD[V](sc, Nil) with SparkHadoopMapReduceUtil { From b9983cc6c01b4950d9ab06561ca16ac558fca886 Mon Sep 17 00:00:00 2001 From: ravipesala Date: Mon, 26 Jun 2017 20:01:35 +0530 Subject: [PATCH 4/4] Fixed update test fail --- .../core/datastore/block/TaskBlockInfo.java | 4 + .../core/indexstore/BlockletDetailInfo.java | 12 ++ .../core/indexstore/UnsafeMemoryDMStore.java | 2 +- .../blockletindex/BlockletDataMap.java | 11 +- .../blockletindex/IndexWrapper.java | 19 +- .../carbondata/core/util/CarbonUtil.java | 27 ++- .../hadoop/CarbonInputFormatNew.java | 185 +++++++++++------- .../carbondata/hadoop/CarbonInputSplit.java | 9 +- .../hadoop/util/CarbonInputFormatUtil.java | 7 +- .../carbondata/spark/util/QueryPlanUtil.scala | 10 +- .../carbondata/spark/util/QueryPlanUtil.scala | 10 +- .../merger/CarbonCompactionUtil.java | 32 +++ 12 files changed, 224 insertions(+), 104 deletions(-) diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java index eb707c22864..4fcec871a18 100644 --- a/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/TaskBlockInfo.java @@ -17,6 +17,7 @@ package org.apache.carbondata.core.datastore.block; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -45,6 +46,9 @@ public Set getTaskSet() { return taskBlockInfoMapping.keySet(); } + public Collection> getAllTableBlockInfoList() { + return taskBlockInfoMapping.values(); + } /** * returns TableBlockInfoList of given task diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java index f1c7f68952e..68dedd86a41 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/BlockletDetailInfo.java @@ -38,6 +38,8 @@ public class BlockletDetailInfo implements Serializable, Writable { private int[] dimLens; + private long schemaUpdatedTimeStamp; + private BlockletInfo blockletInfo; public int getRowCount() { @@ -80,6 +82,14 @@ public void setDimLens(int[] dimLens) { this.dimLens = dimLens; } + public long getSchemaUpdatedTimeStamp() { + return schemaUpdatedTimeStamp; + } + + public void setSchemaUpdatedTimeStamp(long schemaUpdatedTimeStamp) { + this.schemaUpdatedTimeStamp = schemaUpdatedTimeStamp; + } + @Override public void write(DataOutput out) throws IOException { out.writeInt(rowCount); out.writeShort(pagesCount); @@ -88,6 +98,7 @@ public void setDimLens(int[] dimLens) { for (int i = 0; i < dimLens.length; i++) { out.writeInt(dimLens[i]); } + out.writeLong(schemaUpdatedTimeStamp); blockletInfo.write(out); } @@ -99,6 +110,7 @@ public void setDimLens(int[] dimLens) { for (int i = 0; i < dimLens.length; i++) { dimLens[i] = in.readInt(); } + schemaUpdatedTimeStamp = in.readLong(); blockletInfo = new BlockletInfo(); blockletInfo.readFields(in); } diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java index 49e9d34151a..8246f99103f 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/UnsafeMemoryDMStore.java @@ -27,7 +27,7 @@ import static org.apache.carbondata.core.memory.CarbonUnsafe.unsafe; /** - * Store the data to unsafe + * Store the data map row @{@link DataMapRow} data to unsafe. */ public class UnsafeMemoryDMStore { diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java index 85aa4aeb4f5..2fc534327dc 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockletDataMap.java @@ -76,7 +76,9 @@ public class BlockletDataMap implements DataMap, Cacheable { private static int VERSION_INDEX = 6; - private static int BLOCK_INFO_INDEX = 7; + private static int SCHEMA_UPADATED_TIME_INDEX = 7; + + private static int BLOCK_INFO_INDEX = 8; private UnsafeMemoryDMStore unsafeMemoryDMStore; @@ -143,6 +145,9 @@ private void loadToUnsafe(DataFileFooter fileFooter, SegmentProperties segmentPr // add version number row.setShort(fileFooter.getVersionId().number(), ordinal++); + // add schema updated time + row.setLong(fileFooter.getSchemaUpdatedTimeStamp(), ordinal++); + // add blocklet info byte[] serializedData; try { @@ -202,6 +207,9 @@ private void createSchema(SegmentProperties segmentProperties) { // for version number. indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.SHORT)); + // for schema updated time. + indexSchemas.add(new DataMapSchema.FixedDataMapSchema(DataType.LONG)); + //for blocklet info indexSchemas.add(new DataMapSchema.VariableDataMapSchema(DataType.BYTE_ARRAY)); @@ -288,6 +296,7 @@ private Blocklet createBlocklet(DataMapRow row, int blockletId) { detailInfo.setPagesCount(row.getShort(PAGE_COUNT_INDEX)); detailInfo.setVersionNumber(row.getShort(VERSION_INDEX)); detailInfo.setDimLens(columnCardinality); + detailInfo.setSchemaUpdatedTimeStamp(row.getLong(SCHEMA_UPADATED_TIME_INDEX)); BlockletInfo blockletInfo = new BlockletInfo(); try { byte[] byteArray = row.getByteArray(BLOCK_INFO_INDEX); diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java index c18d2a9424c..b8cffc6f2e0 100644 --- a/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java +++ b/core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/IndexWrapper.java @@ -22,12 +22,8 @@ import org.apache.carbondata.core.datastore.block.AbstractIndex; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.block.TableBlockInfo; -import org.apache.carbondata.core.indexstore.BlockletDetailInfo; -import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; -import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema; -import org.apache.carbondata.core.util.AbstractDataFileFooterConverter; -import org.apache.carbondata.core.util.DataFileFooterConverterFactory; +import org.apache.carbondata.core.util.CarbonUtil; /** * Wrapper of abstract index @@ -36,19 +32,14 @@ public class IndexWrapper extends AbstractIndex { public IndexWrapper(List blockInfos) { - BlockletDetailInfo detailInfo = blockInfos.get(0).getDetailInfo(); - int[] dimLens = detailInfo.getDimLens(); - ColumnarFormatVersion version = - ColumnarFormatVersion.valueOf(detailInfo.getVersionNumber()); - AbstractDataFileFooterConverter dataFileFooterConverter = - DataFileFooterConverterFactory.getInstance().getDataFileFooterConverter(version); - List schema; + DataFileFooter fileFooter = null; try { - schema = dataFileFooterConverter.getSchema(blockInfos.get(0)); + fileFooter = CarbonUtil.readMetadatFile(blockInfos.get(0)); } catch (IOException e) { throw new RuntimeException(e); } - segmentProperties = new SegmentProperties(schema, dimLens); + segmentProperties = new SegmentProperties(fileFooter.getColumnInTable(), + fileFooter.getSegmentInfo().getColumnCardinality()); dataRefNode = new BlockletDataRefNodeWrapper(blockInfos, 0, segmentProperties.getDimensionColumnsValueSize()); } diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java index d57f2a24146..51296d885d5 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java +++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java @@ -51,10 +51,13 @@ import org.apache.carbondata.core.datastore.filesystem.CarbonFile; import org.apache.carbondata.core.datastore.impl.FileFactory; import org.apache.carbondata.core.datastore.page.statistics.MeasurePageStatsVO; +import org.apache.carbondata.core.indexstore.BlockletDetailInfo; import org.apache.carbondata.core.keygenerator.mdkey.NumberCompressor; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; +import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.metadata.ValueEncoderMeta; import org.apache.carbondata.core.metadata.blocklet.DataFileFooter; +import org.apache.carbondata.core.metadata.blocklet.SegmentInfo; import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.metadata.encoder.Encoding; import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; @@ -912,10 +915,26 @@ public static boolean[] getComplexDataTypeArray(QueryDimension[] queryDimensions * Below method will be used to read the data file matadata */ public static DataFileFooter readMetadatFile(TableBlockInfo tableBlockInfo) throws IOException { - AbstractDataFileFooterConverter fileFooterConverter = - DataFileFooterConverterFactory.getInstance() - .getDataFileFooterConverter(tableBlockInfo.getVersion()); - return fileFooterConverter.readDataFileFooter(tableBlockInfo); + BlockletDetailInfo detailInfo = tableBlockInfo.getDetailInfo(); + if (detailInfo == null) { + AbstractDataFileFooterConverter fileFooterConverter = + DataFileFooterConverterFactory.getInstance() + .getDataFileFooterConverter(tableBlockInfo.getVersion()); + return fileFooterConverter.readDataFileFooter(tableBlockInfo); + } else { + DataFileFooter fileFooter = new DataFileFooter(); + fileFooter.setSchemaUpdatedTimeStamp(detailInfo.getSchemaUpdatedTimeStamp()); + ColumnarFormatVersion version = + ColumnarFormatVersion.valueOf(detailInfo.getVersionNumber()); + AbstractDataFileFooterConverter dataFileFooterConverter = + DataFileFooterConverterFactory.getInstance().getDataFileFooterConverter(version); + fileFooter.setColumnInTable(dataFileFooterConverter.getSchema(tableBlockInfo)); + SegmentInfo segmentInfo = new SegmentInfo(); + segmentInfo.setColumnCardinality(detailInfo.getDimLens()); + segmentInfo.setNumberOfColumns(detailInfo.getRowCount()); + fileFooter.setSegmentInfo(segmentInfo); + return fileFooter; + } } /** diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java index bb91fc8665d..c7c4e5dcb5d 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputFormatNew.java @@ -21,19 +21,26 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier; import org.apache.carbondata.core.indexstore.AbstractTableDataMap; import org.apache.carbondata.core.indexstore.Blocklet; import org.apache.carbondata.core.indexstore.DataMapStoreManager; import org.apache.carbondata.core.indexstore.DataMapType; +import org.apache.carbondata.core.keygenerator.KeyGenException; import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier; import org.apache.carbondata.core.metadata.ColumnarFormatVersion; import org.apache.carbondata.core.metadata.schema.PartitionInfo; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; +import org.apache.carbondata.core.mutate.CarbonUpdateUtil; +import org.apache.carbondata.core.mutate.SegmentUpdateDetails; import org.apache.carbondata.core.mutate.UpdateVO; +import org.apache.carbondata.core.mutate.data.BlockMappingVO; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.filter.FilterExpressionProcessor; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; @@ -217,85 +224,78 @@ private static AbsoluteTableIdentifier getAbsoluteTableIdentifier(Configuration */ @Override public List getSplits(JobContext job) throws IOException { AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration()); - CacheClient cacheClient = new CacheClient(identifier.getStorePath()); AbstractTableDataMap blockletMap = DataMapStoreManager.getInstance().getDataMap(identifier, "blocklet", DataMapType.BLOCKLET); - try { - List invalidSegments = new ArrayList<>(); - List invalidTimestampsList = new ArrayList<>(); - - // get all valid segments and set them into the configuration - if (getSegmentsToAccess(job).length == 0) { - SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier); - SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = - segmentStatusManager.getValidAndInvalidSegments(); - SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier); - setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments()); - if (segments.getValidSegments().size() == 0) { - return new ArrayList<>(0); - } + List invalidSegments = new ArrayList<>(); + List invalidTimestampsList = new ArrayList<>(); + + // get all valid segments and set them into the configuration + if (getSegmentsToAccess(job).length == 0) { + SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier); + SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = + segmentStatusManager.getValidAndInvalidSegments(); + SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier); + setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments()); + if (segments.getValidSegments().size() == 0) { + return new ArrayList<>(0); + } - // remove entry in the segment index if there are invalid segments - invalidSegments.addAll(segments.getInvalidSegments()); - for (String invalidSegmentId : invalidSegments) { - invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId)); - } - if (invalidSegments.size() > 0) { - List invalidSegmentsIds = - new ArrayList<>(invalidSegments.size()); - for (String segId : invalidSegments) { - invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId)); - } - cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds); - blockletMap.clear(invalidSegments); + // remove entry in the segment index if there are invalid segments + invalidSegments.addAll(segments.getInvalidSegments()); + for (String invalidSegmentId : invalidSegments) { + invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId)); + } + if (invalidSegments.size() > 0) { + List invalidSegmentsIds = + new ArrayList<>(invalidSegments.size()); + for (String segId : invalidSegments) { + invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId)); } + blockletMap.clear(invalidSegments); } + } - // process and resolve the expression - Expression filter = getFilterPredicates(job.getConfiguration()); - CarbonTable carbonTable = getCarbonTable(job.getConfiguration()); - // this will be null in case of corrupt schema file. - if (null == carbonTable) { - throw new IOException("Missing/Corrupt schema file for table."); - } + // process and resolve the expression + Expression filter = getFilterPredicates(job.getConfiguration()); + CarbonTable carbonTable = getCarbonTable(job.getConfiguration()); + // this will be null in case of corrupt schema file. + if (null == carbonTable) { + throw new IOException("Missing/Corrupt schema file for table."); + } - CarbonInputFormatUtil.processFilterExpression(filter, carbonTable); - - // prune partitions for filter query on partition table - BitSet matchedPartitions = null; - if (null != filter) { - PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName()); - if (null != partitionInfo) { - Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo); - matchedPartitions = new FilterExpressionProcessor() - .getFilteredPartitions(filter, partitionInfo, partitioner); - if (matchedPartitions.cardinality() == 0) { - // no partition is required - return new ArrayList(); - } - if (matchedPartitions.cardinality() == partitioner.numPartitions()) { - // all partitions are required, no need to prune partitions - matchedPartitions = null; - } + CarbonInputFormatUtil.processFilterExpression(filter, carbonTable); + + // prune partitions for filter query on partition table + BitSet matchedPartitions = null; + if (null != filter) { + PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName()); + if (null != partitionInfo) { + Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo); + matchedPartitions = new FilterExpressionProcessor() + .getFilteredPartitions(filter, partitionInfo, partitioner); + if (matchedPartitions.cardinality() == 0) { + // no partition is required + return new ArrayList(); + } + if (matchedPartitions.cardinality() == partitioner.numPartitions()) { + // all partitions are required, no need to prune partitions + matchedPartitions = null; } } + } - FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier); + FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier); - // do block filtering and get split - List splits = getSplits(job, filterInterface, matchedPartitions, cacheClient); - // pass the invalid segment to task side in order to remove index entry in task side - if (invalidSegments.size() > 0) { - for (InputSplit split : splits) { - ((CarbonInputSplit) split).setInvalidSegments(invalidSegments); - ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList); - } + // do block filtering and get split + List splits = getSplits(job, filterInterface, matchedPartitions); + // pass the invalid segment to task side in order to remove index entry in task side + if (invalidSegments.size() > 0) { + for (InputSplit split : splits) { + ((CarbonInputSplit) split).setInvalidSegments(invalidSegments); + ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList); } - return splits; - } finally { - // close the cache cache client to clear LRU cache memory - cacheClient.close(); } + return splits; } /** @@ -307,10 +307,9 @@ private static AbsoluteTableIdentifier getAbsoluteTableIdentifier(Configuration * @throws IOException */ private List getSplits(JobContext job, FilterResolverIntf filterResolver, - BitSet matchedPartitions, CacheClient cacheClient) throws IOException { + BitSet matchedPartitions) throws IOException { List result = new LinkedList(); - FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor(); UpdateVO invalidBlockVOForSegmentId = null; Boolean isIUDTable = false; @@ -514,4 +513,54 @@ private String[] getSegmentsToAccess(JobContext job) { return segmentString.split(","); } + /** + * Get the row count of the Block and mapping of segment and Block count. + * + * @param job + * @param identifier + * @return + * @throws IOException + * @throws KeyGenException + */ + public BlockMappingVO getBlockRowCount(JobContext job, AbsoluteTableIdentifier identifier) + throws IOException, KeyGenException { + AbstractTableDataMap blockletMap = + DataMapStoreManager.getInstance().getDataMap(identifier, "blocklet", DataMapType.BLOCKLET); + SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier); + SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegments = + new SegmentStatusManager(identifier).getValidAndInvalidSegments(); + Map blockRowCountMapping = + new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); + Map segmentAndBlockCountMapping = + new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); + List blocklets = blockletMap.prune(validAndInvalidSegments.getValidSegments(), null); + for (Blocklet blocklet : blocklets) { + String blockName = blocklet.getPath().toString(); + blockName = CarbonTablePath.getCarbonDataFileName(blockName); + blockName = blockName + CarbonTablePath.getCarbonDataExtension(); + + long rowCount = blocklet.getDetailInfo().getRowCount(); + + String key = CarbonUpdateUtil.getSegmentBlockNameKey(blocklet.getSegmentId(), blockName); + + // if block is invalid then dont add the count + SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key); + + if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getStatus())) { + Long blockCount = blockRowCountMapping.get(key); + if (blockCount == null) { + blockCount = 0L; + Long count = segmentAndBlockCountMapping.get(blocklet.getSegmentId()); + if (count == null) { + count = 0L; + } + segmentAndBlockCountMapping.put(blocklet.getSegmentId(), count + 1); + } + blockCount += rowCount; + blockRowCountMapping.put(key, blockCount); + } + } + return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping); + } + } diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java index 567894c7294..56bade7f8a4 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java @@ -158,9 +158,12 @@ public static TableBlockInfo getTableBlockInfo(CarbonInputSplit inputSplit) { BlockletInfos blockletInfos = new BlockletInfos(inputSplit.getNumberOfBlocklets(), 0, inputSplit.getNumberOfBlocklets()); try { - return new TableBlockInfo(inputSplit.getPath().toString(), inputSplit.getStart(), - inputSplit.getSegmentId(), inputSplit.getLocations(), inputSplit.getLength(), - blockletInfos, inputSplit.getVersion(), inputSplit.getDeleteDeltaFiles()); + TableBlockInfo blockInfo = + new TableBlockInfo(inputSplit.getPath().toString(), inputSplit.getStart(), + inputSplit.getSegmentId(), inputSplit.getLocations(), inputSplit.getLength(), + blockletInfos, inputSplit.getVersion(), inputSplit.getDeleteDeltaFiles()); + blockInfo.setDetailInfo(inputSplit.getDetailInfo()); + return blockInfo; } catch (IOException e) { throw new RuntimeException("fail to get location of split: " + inputSplit, e); } diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java index 827030416f1..d7324fc30f8 100644 --- a/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java +++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/util/CarbonInputFormatUtil.java @@ -34,7 +34,7 @@ import org.apache.carbondata.core.scan.model.QueryDimension; import org.apache.carbondata.core.scan.model.QueryMeasure; import org.apache.carbondata.core.scan.model.QueryModel; -import org.apache.carbondata.hadoop.CarbonInputFormat; +import org.apache.carbondata.hadoop.CarbonInputFormatNew; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; @@ -77,9 +77,10 @@ public static CarbonQueryPlan createQueryPlan(CarbonTable carbonTable, String co return plan; } - public static CarbonInputFormat createCarbonInputFormat(AbsoluteTableIdentifier identifier, + public static CarbonInputFormatNew createCarbonInputFormat( + AbsoluteTableIdentifier identifier, Job job) throws IOException { - CarbonInputFormat carbonInputFormat = new CarbonInputFormat<>(); + CarbonInputFormatNew carbonInputFormat = new CarbonInputFormatNew<>(); FileInputFormat.addInputPath(job, new Path(identifier.getTablePath())); return carbonInputFormat; } diff --git a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala index 2ca3b8c6c2f..98a4339088d 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala @@ -26,7 +26,7 @@ import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier -import org.apache.carbondata.hadoop.CarbonInputFormat +import org.apache.carbondata.hadoop.CarbonInputFormatNew /** @@ -38,8 +38,8 @@ object QueryPlanUtil { * createCarbonInputFormat from query model */ def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier) : - (CarbonInputFormat[Array[Object]], Job) = { - val carbonInputFormat = new CarbonInputFormat[Array[Object]]() + (CarbonInputFormatNew[Array[Object]], Job) = { + val carbonInputFormat = new CarbonInputFormatNew[Array[Object]]() val jobConf: JobConf = new JobConf(new Configuration) val job: Job = new Job(jobConf) FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath)) @@ -47,8 +47,8 @@ object QueryPlanUtil { } def createCarbonInputFormat[V: ClassTag](absoluteTableIdentifier: AbsoluteTableIdentifier, - conf: Configuration) : CarbonInputFormat[V] = { - val carbonInputFormat = new CarbonInputFormat[V]() + conf: Configuration) : CarbonInputFormatNew[V] = { + val carbonInputFormat = new CarbonInputFormatNew[V]() val job: Job = new Job(conf) FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath)) carbonInputFormat diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala b/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala index 70c7caf963a..4b277234217 100644 --- a/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala +++ b/integration/spark2/src/main/scala/org/apache/carbondata/spark/util/QueryPlanUtil.scala @@ -26,7 +26,7 @@ import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier -import org.apache.carbondata.hadoop.CarbonInputFormat +import org.apache.carbondata.hadoop.CarbonInputFormatNew /** * All the utility functions for carbon plan creation @@ -37,8 +37,8 @@ object QueryPlanUtil { * createCarbonInputFormat from query model */ def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier) : - (CarbonInputFormat[Array[Object]], Job) = { - val carbonInputFormat = new CarbonInputFormat[Array[Object]]() + (CarbonInputFormatNew[Array[Object]], Job) = { + val carbonInputFormat = new CarbonInputFormatNew[Array[Object]]() val jobConf: JobConf = new JobConf(new Configuration) val job: Job = new Job(jobConf) FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath)) @@ -46,8 +46,8 @@ object QueryPlanUtil { } def createCarbonInputFormat[V: ClassTag](absoluteTableIdentifier: AbsoluteTableIdentifier, - conf: Configuration) : CarbonInputFormat[V] = { - val carbonInputFormat = new CarbonInputFormat[V]() + conf: Configuration) : CarbonInputFormatNew[V] = { + val carbonInputFormat = new CarbonInputFormatNew[V]() val job: Job = new Job(conf) FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath)) carbonInputFormat diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java index 8cdcd263c38..99bfd44a41b 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonCompactionUtil.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -379,4 +380,35 @@ public static boolean checkIfAnyRestructuredBlockExists(Map segmentMapping, + long tableLastUpdatedTime) { + boolean restructuredBlockExists = false; + for (Map.Entry taskMap : segmentMapping.entrySet()) { + String segmentId = taskMap.getKey(); + TaskBlockInfo taskBlockInfo = taskMap.getValue(); + Collection> infoList = taskBlockInfo.getAllTableBlockInfoList(); + for (List listMetadata : infoList) { + for (TableBlockInfo blockInfo : listMetadata) { + // if schema modified timestamp is greater than footer stored schema timestamp, + // it indicates it is a restructured block + if (tableLastUpdatedTime > blockInfo.getDetailInfo().getSchemaUpdatedTimeStamp()) { + restructuredBlockExists = true; + break; + } + } + } + if (restructuredBlockExists) { + break; + } + } + return restructuredBlockExists; + } }