From d5556989b2dbb3a65ba27b7ac561562c9e07917e Mon Sep 17 00:00:00 2001 From: Jacky Li Date: Fri, 14 Sep 2018 18:28:25 +0800 Subject: [PATCH 1/2] write is_sorter in footer --- format/src/main/thrift/carbondata.thrift | 1 + .../store/CarbonFactDataHandlerColumnar.java | 2 +- .../store/writer/AbstractFactDataWriter.java | 4 ++-- .../store/writer/CarbonFactDataWriter.java | 2 +- .../writer/v3/CarbonFactDataWriterImplV3.java | 17 +++++++++++++---- 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/format/src/main/thrift/carbondata.thrift b/format/src/main/thrift/carbondata.thrift index 2423ffa6b56..93dff633b70 100644 --- a/format/src/main/thrift/carbondata.thrift +++ b/format/src/main/thrift/carbondata.thrift @@ -204,6 +204,7 @@ struct FileFooter3{ 3: required list blocklet_index_list; // Blocklet index of all blocklets in this file 4: optional list blocklet_info_list3; // Information about blocklets of all columns in this file for V3 format 5: optional dictionary.ColumnDictionaryChunk dictionary; // Blocklet local dictionary + 6: optional bool is_sort; // True if the data is sorted in this file, it is used for compaction to decide whether to use merge sort or not } /** diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java index 7151b47b794..2039200d8f9 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java @@ -365,7 +365,7 @@ public void closeHandler() throws CarbonDataWriterException { } consumerExecutorService.shutdownNow(); processWriteTaskSubmitList(consumerExecutorServiceTaskList); - this.dataWriter.writeFooterToFile(); + this.dataWriter.writeFooter(); LOGGER.info("All blocklets have been finished writing"); // close all the open stream for both the files this.dataWriter.closeWriter(); diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java index 836e2c8111c..ad0e8e0ad88 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/AbstractFactDataWriter.java @@ -229,7 +229,7 @@ protected void createNewFileIfReachThreshold(long blockletSizeToBeAdded) LOGGER.info("Writing data to file as max file size reached for file: " + activeFile + ". Data block size: " + currentFileSize); // write meta data to end of the existing file - writeBlockletInfoToFile(); + writeFooterToFile(); this.currentFileSize = 0; this.dataChunksOffsets = new ArrayList<>(); this.dataChunksLength = new ArrayList<>(); @@ -324,7 +324,7 @@ public void initializeWriter() throws CarbonDataWriterException { /** * This method will write metadata at the end of file file format in thrift format */ - protected abstract void writeBlockletInfoToFile() throws CarbonDataWriterException; + protected abstract void writeFooterToFile() throws CarbonDataWriterException; /** * Below method will be used to fill the vlock info details diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java index 3a2fa1c013a..e9d413d4511 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/CarbonFactDataWriter.java @@ -35,7 +35,7 @@ public interface CarbonFactDataWriter { * * @throws CarbonDataWriterException */ - void writeFooterToFile() throws CarbonDataWriterException; + void writeFooter() throws CarbonDataWriterException; /** * Below method will be used to initialise the writer diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java index 8622fcd5e7a..3f9554fdec3 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java +++ b/processing/src/main/java/org/apache/carbondata/processing/store/writer/v3/CarbonFactDataWriterImplV3.java @@ -45,6 +45,7 @@ import static org.apache.carbondata.core.constants.CarbonCommonConstants.TABLE_BLOCKLET_SIZE; import static org.apache.carbondata.core.constants.CarbonV3DataFormatConstants.BLOCKLET_SIZE_IN_MB; import static org.apache.carbondata.core.constants.CarbonV3DataFormatConstants.BLOCKLET_SIZE_IN_MB_DEFAULT_VALUE; +import static org.apache.carbondata.processing.loading.sort.SortScopeOptions.SortScope.NO_SORT; /** * Below class will be used to write the data in V3 format @@ -68,6 +69,11 @@ public class CarbonFactDataWriterImplV3 extends AbstractFactDataWriter { */ private long blockletSizeThreshold; + /** + * True if this file is sorted + */ + private boolean isSorted; + public CarbonFactDataWriterImplV3(CarbonFactDataHandlerModel model) { super(model); String blockletSize = @@ -83,10 +89,11 @@ public CarbonFactDataWriterImplV3(CarbonFactDataHandlerModel model) { LOGGER.info("Blocklet size configure for table is: " + blockletSizeThreshold); } blockletDataHolder = new BlockletDataHolder(fallbackExecutorService, model); + isSorted = model.getSortScope() != NO_SORT; } - @Override protected void writeBlockletInfoToFile() - throws CarbonDataWriterException { + @Override + protected void writeFooterToFile() throws CarbonDataWriterException { try { // get the current file position long currentPosition = currentOffsetInFile; @@ -94,6 +101,7 @@ public CarbonFactDataWriterImplV3(CarbonFactDataHandlerModel model) { FileFooter3 convertFileMeta = CarbonMetadataUtil .convertFileFooterVersion3(blockletMetadata, blockletIndex, localCardinality, thriftColumnSchemaList.size()); + convertFileMeta.setIs_sort(isSorted); // fill the carbon index details fillBlockIndexInfoDetails(convertFileMeta.getNum_rows(), carbonDataFileName, currentPosition); // write the footer @@ -376,9 +384,10 @@ public void closeWriter() throws CarbonDataWriterException { } } - @Override public void writeFooterToFile() throws CarbonDataWriterException { + @Override + public void writeFooter() throws CarbonDataWriterException { if (this.blockletMetadata.size() > 0) { - writeBlockletInfoToFile(); + writeFooterToFile(); } } } From 98172df2e2cd9c2e002f7f492b36b22dce0aea19 Mon Sep 17 00:00:00 2001 From: Jacky Li Date: Sat, 15 Sep 2018 14:13:31 +0800 Subject: [PATCH 2/2] fix test --- .../apache/spark/sql/CarbonGetTableDetailComandTestCase.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala index a49d5bbc40b..8a311c06444 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/sql/CarbonGetTableDetailComandTestCase.scala @@ -43,9 +43,9 @@ class CarbonGetTableDetailCommandTestCase extends QueryTest with BeforeAndAfterA assertResult(2)(result.length) assertResult("table_info1")(result(0).getString(0)) // 2087 is the size of carbon table. Note that since 1.5.0, we add additional compressor name in metadata - assertResult(2187)(result(0).getLong(1)) + assertResult(2188)(result(0).getLong(1)) assertResult("table_info2")(result(1).getString(0)) - assertResult(2187)(result(1).getLong(1)) + assertResult(2188)(result(1).getLong(1)) } override def afterAll: Unit = {