From 62f95e48ee5d82f859770918cd959bfec32b765b Mon Sep 17 00:00:00 2001 From: akashrn5 Date: Tue, 23 Oct 2018 10:19:24 +0530 Subject: [PATCH] review comments handled --- .../TestNonTransactionalCarbonTable.scala | 4 +- .../management/CarbonShowSummaryCommand.scala | 7 +- .../sql/parser/CarbonSpark2SqlParser.scala | 4 +- .../org/apache/carbondata/tool/CarbonCli.java | 6 +- .../org/apache/carbondata/tool/DataFile.java | 5 + .../apache/carbondata/tool/DataSummary.java | 102 ++++++++++-------- .../apache/carbondata/tool/FileCollector.java | 4 +- .../apache/carbondata/tool/ScanBenchmark.java | 6 +- .../apache/carbondata/tool/ShardPrinter.java | 22 ++-- ...{TablePrinter.java => TableFormatter.java} | 7 +- .../apache/carbondata/tool/CarbonCliTest.java | 2 +- 11 files changed, 98 insertions(+), 71 deletions(-) rename tools/cli/src/main/java/org/apache/carbondata/tool/{TablePrinter.java => TableFormatter.java} (92%) diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala index 1fd83b89db8..2036ed2fbfe 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala @@ -389,7 +389,9 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll { |'carbondata' LOCATION |'$writerPath' """.stripMargin) - sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").show(1000,false) + val output = sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").collect() + + assert(output.toList.contains(Row("written_by Version "))) checkExistence(sql("describe formatted sdkOutputTable"), true, "age,name") diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala index 99fe4c6218d..461f31f9b85 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala @@ -26,9 +26,14 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.{Checker, DataCommand} import org.apache.spark.sql.types.StringType -import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.tool.CarbonCli +/** + * Show summary command class which is integrated to cli and sql support is provided via this class + * @param databaseNameOp + * @param tableName + * @param commandOptions + */ case class CarbonShowSummaryCommand( databaseNameOp: Option[String], tableName: String, diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala index 6a72d5c0f94..5427168aea8 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala @@ -497,10 +497,10 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser { protected lazy val cli: Parser[LogicalPlan] = - (SHOW ~> SUMMARY ~> FOR ~> TABLE) ~ (ident <~ ".").? ~ ident ~ + (SHOW ~> SUMMARY ~> FOR ~> TABLE) ~> (ident <~ ".").? ~ ident ~ (OPTIONS ~> "(" ~> repsep(summaryOptions, ",") <~ ")").? <~ opt(";") ^^ { - case showSummary ~ databaseName ~ tableName ~ commandList => + case databaseName ~ tableName ~ commandList => var commandOptions: Map[String, String] = null if (commandList.isDefined) { commandOptions = commandList.getOrElse(List.empty[(String, String)]).toMap diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java index 18a98f90181..11553a69513 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.PrintStream; import java.util.ArrayList; +import java.util.List; import org.apache.carbondata.common.annotations.InterfaceAudience; import org.apache.carbondata.common.annotations.InterfaceStability; @@ -41,8 +42,11 @@ @InterfaceStability.Unstable public class CarbonCli { - private static ArrayList outPuts; + // List to collect all the outputs of option details + private static List outPuts; + // a boolean variable to decide whether to print the output in console or return the list, + // by default true, and it will be set to false if the cli is trigerred via sql command private static boolean isPrintInConsole = true; private static Options buildOptions() { diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java index 039401e5adf..c8fdc9e39fd 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java @@ -317,6 +317,10 @@ class ColumnChunk { // min/max stats of this column chunk byte[] min, max; + // to set whether min max is present for the column chunck, as we may not right min max after + // specific size + boolean isMinMaxPresent; + // percentage of min/max comparing to min/max scope collected in all blocklets // they are set after calculation in DataSummary double minPercentage, maxPercentage; @@ -335,6 +339,7 @@ class ColumnChunk { this.column = column; min = index.min_max_index.min_values.get(columnIndex).array(); max = index.min_max_index.max_values.get(columnIndex).array(); + isMinMaxPresent = index.min_max_index.min_max_presence.get(columnIndex); // read the column chunk metadata: DataChunk3 ByteBuffer buffer = fileReader.readByteBuffer( diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java index 5984b8af4a9..b397d15cfe2 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java @@ -20,7 +20,6 @@ import java.io.File; import java.io.IOException; import java.nio.charset.Charset; -import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashMap; @@ -40,6 +39,7 @@ import org.apache.carbondata.core.statusmanager.SegmentStatusManager; import org.apache.carbondata.core.util.ByteUtil; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; import org.apache.carbondata.format.BlockletInfo3; import org.apache.carbondata.format.DataChunk2; import org.apache.carbondata.format.DataChunk3; @@ -57,12 +57,12 @@ */ class DataSummary implements Command { private String dataFolder; - private ArrayList outPuts; + private List outPuts; // file path mapping to file object private LinkedHashMap dataFiles; - DataSummary(String dataFolder, ArrayList outPuts) { + DataSummary(String dataFolder, List outPuts) { this.dataFolder = dataFolder; this.outPuts = outPuts; } @@ -82,14 +82,14 @@ public void run(CommandLine line) throws IOException, MemoryException { } if (line.hasOption("s") || printAll) { if (dataFiles.size() > 0) { - printSchema(dataFiles.entrySet().iterator().next().getValue()); + collectSchemaDetails(dataFiles.entrySet().iterator().next().getValue()); } } if (line.hasOption("m") || printAll) { - printSegments(collector.getTableStatusFile()); + collectSegmentsDetails(collector.getTableStatusFile()); } if (line.hasOption("t") || printAll) { - printTableProperties(collector.getSchemaFile()); + collectTableProperties(collector.getSchemaFile()); } if (line.hasOption("b") || printAll) { String limitSize = line.getOptionValue("b"); @@ -97,25 +97,25 @@ public void run(CommandLine line) throws IOException, MemoryException { // by default we can limit the output to two shards and user can increase this limit limitSize = "2"; } - printBlockletDetail(Integer.parseInt(limitSize)); + collectBlockletDetail(Integer.parseInt(limitSize)); } if (line.hasOption("v") || printAll) { - printVersionDetails(); + collectVersionDetails(); } if (line.hasOption("B")) { String blockFileName = line.getOptionValue("B"); - printBlockDetails(blockFileName); + collectBlockDetails(blockFileName); } if (line.hasOption("c")) { String columName = line.getOptionValue("c"); printColumnStats(columName); if (line.hasOption("k")) { - printColumnChunkMeta(columName); + collectColumnChunkMeta(columName); } } } - private void printSchema(DataFile dataFile) throws IOException { + private void collectSchemaDetails(DataFile dataFile) throws IOException { CarbonFile file = FileFactory.getCarbonFile(dataFile.getFilePath()); outPuts.add(""); outPuts.add("## Schema"); @@ -125,7 +125,7 @@ private void printSchema(DataFile dataFile) throws IOException { outPuts.add("version: V" + header.version); outPuts.add("timestamp: " + new java.sql.Timestamp(header.time_stamp)); List columns = reader.readSchema(); - TablePrinter printer = new TablePrinter( + TableFormatter tableFormatter = new TableFormatter( new String[]{"Column Name", "Data Type", "Column Type", "SortColumn", "Encoding", "Ordinal", "Id"}, outPuts); for (ColumnSchema column : columns) { @@ -134,7 +134,7 @@ private void printSchema(DataFile dataFile) throws IOException { shortColumnId = "*" + column.getColumnUniqueId().substring(column.getColumnUniqueId().length() - 4); } - printer.addRow(new String[]{ + tableFormatter.addRow(new String[]{ column.getColumnName(), column.getDataType().getName(), column.isDimensionColumn() ? "dimension" : "measure", @@ -144,17 +144,17 @@ private void printSchema(DataFile dataFile) throws IOException { shortColumnId }); } - printer.printFormatted(); + tableFormatter.printFormatted(); } - private void printSegments(CarbonFile tableStatusFile) throws IOException { + private void collectSegmentsDetails(CarbonFile tableStatusFile) throws IOException { outPuts.add(""); outPuts.add("## Segment"); if (tableStatusFile != null) { // first collect all information in memory then print a formatted table LoadMetadataDetails[] segments = SegmentStatusManager.readTableStatusFile(tableStatusFile.getPath()); - TablePrinter printer = new TablePrinter( + TableFormatter tableFormatter = new TableFormatter( new String[]{"SegmentID", "Status", "Load Start", "Load End", "Merged To", "Format", "Data Size", "Index Size"}, outPuts); for (LoadMetadataDetails segment : segments) { @@ -169,7 +169,7 @@ private void printSegments(CarbonFile tableStatusFile) throws IOException { } else { indexSize = Strings.formatSize(Long.parseLong(segment.getIndexSize())); } - printer.addRow(new String[]{ + tableFormatter.addRow(new String[]{ segment.getLoadName(), segment.getSegmentStatus().toString(), new java.sql.Date(segment.getLoadStartTime()).toString(), @@ -180,33 +180,33 @@ private void printSegments(CarbonFile tableStatusFile) throws IOException { indexSize} ); } - printer.printFormatted(); + tableFormatter.printFormatted(); } else { outPuts.add("table status file not found"); } } - private void printTableProperties(CarbonFile schemaFile) throws IOException { + private void collectTableProperties(CarbonFile schemaFile) throws IOException { outPuts.add(""); outPuts.add("## Table Properties"); if (schemaFile != null) { TableInfo thriftTableInfo = CarbonUtil.readSchemaFile(schemaFile.getPath()); Map tblProperties = thriftTableInfo.fact_table.tableProperties; - TablePrinter printer = new TablePrinter( + TableFormatter tableFormatter = new TableFormatter( new String[]{"Property Name", "Property Value"}, outPuts); for (Map.Entry entry : tblProperties.entrySet()) { - printer.addRow(new String[] { + tableFormatter.addRow(new String[] { String.format("'%s'", entry.getKey()), String.format("'%s'", entry.getValue()) }); } - printer.printFormatted(); + tableFormatter.printFormatted(); } else { outPuts.add("schema file not found"); } } - private void printBlockletDetail(int limitSize) { + private void collectBlockletDetail(int limitSize) { outPuts.add(""); outPuts.add("## Block Detail"); @@ -231,43 +231,44 @@ private void printBlockletDetail(int limitSize) { break; } } - printer.printFormatted(); + printer.collectFormattedData(); } - private void printBlockDetails(String blockFilePath) throws IOException { + private void collectBlockDetails(String blockFilePath) throws IOException { outPuts.add(""); outPuts.add("## Filtered Block Details for: " + blockFilePath .substring(blockFilePath.lastIndexOf(File.separator) + 1, blockFilePath.length())); - TablePrinter printer = - new TablePrinter(new String[] { "BLKLT", "NumPages", "NumRows", "Size" }, outPuts); + TableFormatter tableFormatter = + new TableFormatter(new String[] { "BLKLT", "NumPages", "NumRows", "Size" }, outPuts); CarbonFile datafile = FileFactory.getCarbonFile(blockFilePath); DataFile dataFile = new DataFile(datafile); dataFile.collectAllMeta(); FileFooter3 footer = dataFile.getFooter(); for (int blockletId = 0; blockletId < footer.blocklet_info_list3.size(); blockletId++) { BlockletInfo3 blocklet = footer.blocklet_info_list3.get(blockletId); - printer.addRow(new String[]{ + tableFormatter.addRow(new String[]{ String.valueOf(blockletId), String.format("%,d", blocklet.number_number_of_pages), String.format("%,d", blocklet.num_rows), Strings.formatSize(dataFile.getBlockletSizeInBytes(blockletId)) }); } - printer.printFormatted(); + tableFormatter.printFormatted(); } - private void printVersionDetails() { + private void collectVersionDetails() { DataFile file = dataFiles.entrySet().iterator().next().getValue(); FileFooter3 footer = file.getFooter(); if (null != footer.getExtra_info()) { outPuts.add(""); outPuts.add("## version Details"); - TablePrinter printer = new TablePrinter(new String[] { "written_by", "Version" }, outPuts); - printer.addRow(new String[] { String.format("%s", + TableFormatter tableFormatter = + new TableFormatter(new String[] { "written_by", "Version" }, outPuts); + tableFormatter.addRow(new String[] { String.format("%s", footer.getExtra_info().get(CarbonCommonConstants.CARBON_WRITTEN_BY_FOOTER_INFO)), String.format("%s", footer.getExtra_info().get(CarbonCommonConstants.CARBON_VERSION_FOOTER_INFO)) }); - printer.printFormatted(); + tableFormatter.printFormatted(); } } @@ -300,9 +301,11 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep if (blocklet.getColumnChunk().getDataType() == DataTypes.STRING) { minPercent = "NA"; maxPercent = "NA"; - // for complex types min max can be given as NA + // for complex types min max can be given as NA and for varchar where min max is not + // written, can give NA if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || blocklet - .getColumnChunk().column.getColumnName().contains(".")) { + .getColumnChunk().column.getColumnName().contains(".") || !blocklet + .getColumnChunk().isMinMaxPresent) { min = "NA"; max = "NA"; } else { @@ -312,15 +315,24 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep } else { minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100); maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100); - if (blockletMin.length > 4) { - min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length)); + DataFile.ColumnChunk columnChunk = blocklet.columnChunk; + if (columnChunk.column.isDimensionColumn() && DataTypeUtil + .isPrimitiveColumn(columnChunk.column.getDataType())) { + min = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMin, + columnChunk.column.getDataType()).toString(); + max = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMax, + columnChunk.column.getDataType()).toString(); } else { - min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length)); - } - if (blockletMax.length > 4) { - max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length)); - } else { - max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length)); + if (blockletMin.length > 4) { + min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length)); + } else { + min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length)); + } + if (blockletMax.length > 4) { + max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length)); + } else { + max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length)); + } } } printer.addRow( @@ -341,7 +353,7 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep ); } } - printer.printFormatted(); + printer.collectFormattedData(); } private void collectStats(String columnName) throws IOException, MemoryException { @@ -354,7 +366,7 @@ private void collectStats(String columnName) throws IOException, MemoryException } } - private void printColumnChunkMeta(String columnName) throws IOException, MemoryException { + private void collectColumnChunkMeta(String columnName) throws IOException, MemoryException { DataFile file = dataFiles.entrySet().iterator().next().getValue(); outPuts.add(""); outPuts.add("## Page Meta for column '" + columnName + "' in file " + file.getFilePath()); diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java index 0e257d3c738..aa48b93bc0f 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java @@ -42,7 +42,7 @@ class FileCollector { private long numPage; private long numRow; private long totalDataSize; - private ArrayList outPuts; + private List outPuts; // file path mapping to file object private LinkedHashMap dataFiles = new LinkedHashMap<>(); @@ -50,7 +50,7 @@ class FileCollector { private CarbonFile schemaFile; - FileCollector(ArrayList outPuts) { + FileCollector(List outPuts) { this.outPuts = outPuts; } diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java b/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java index eb714a8d7a6..af5bdb300b7 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/ScanBenchmark.java @@ -18,7 +18,7 @@ package org.apache.carbondata.tool; import java.io.IOException; -import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -49,9 +49,9 @@ class ScanBenchmark implements Command { private String dataFolder; private DataFile file; - private ArrayList outPuts; + private List outPuts; - ScanBenchmark(String dataFolder, ArrayList outPuts) { + ScanBenchmark(String dataFolder, List outPuts) { this.dataFolder = dataFolder; this.outPuts = outPuts; } diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/ShardPrinter.java b/tools/cli/src/main/java/org/apache/carbondata/tool/ShardPrinter.java index 2b56908a1f4..20dece68c65 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/ShardPrinter.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/ShardPrinter.java @@ -17,32 +17,32 @@ package org.apache.carbondata.tool; -import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; class ShardPrinter { - private Map shardPrinter = new HashMap<>(); + private Map shardPrinter = new HashMap<>(); private String[] header; - private ArrayList outPuts; + private List outPuts; - ShardPrinter(String[] header, ArrayList outPuts) { + ShardPrinter(String[] header, List outPuts) { this.header = header; this.outPuts = outPuts; } void addRow(String shardName, String[] row) { - TablePrinter printer = shardPrinter.get(shardName); - if (printer == null) { - printer = new TablePrinter(header, outPuts); - shardPrinter.put(shardName, printer); + TableFormatter tableFormatter = shardPrinter.get(shardName); + if (tableFormatter == null) { + tableFormatter = new TableFormatter(header, outPuts); + shardPrinter.put(shardName, tableFormatter); } - printer.addRow(row); + tableFormatter.addRow(row); } - void printFormatted() { + void collectFormattedData() { int shardId = 1; - for (Map.Entry entry : shardPrinter.entrySet()) { + for (Map.Entry entry : shardPrinter.entrySet()) { outPuts.add(String.format("Shard #%d (%s)", shardId++, entry.getKey())); entry.getValue().printFormatted(); outPuts.add(""); diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/TablePrinter.java b/tools/cli/src/main/java/org/apache/carbondata/tool/TableFormatter.java similarity index 92% rename from tools/cli/src/main/java/org/apache/carbondata/tool/TablePrinter.java rename to tools/cli/src/main/java/org/apache/carbondata/tool/TableFormatter.java index 5af8975417d..2c57cd1d9bb 100644 --- a/tools/cli/src/main/java/org/apache/carbondata/tool/TablePrinter.java +++ b/tools/cli/src/main/java/org/apache/carbondata/tool/TableFormatter.java @@ -17,19 +17,18 @@ package org.apache.carbondata.tool; -import java.util.ArrayList; import java.util.LinkedList; import java.util.List; -class TablePrinter { +class TableFormatter { private List table = new LinkedList<>(); - private ArrayList outPuts; + private List outPuts; /** * create a new Table Printer * @param header table header */ - TablePrinter(String[] header, ArrayList outPuts) { + TableFormatter(String[] header, List outPuts) { this.table.add(header); this.outPuts = outPuts; } diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java index 01471c98a23..659f32974e5 100644 --- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java +++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java @@ -182,7 +182,7 @@ public void testSummaryOutputAll() { Assert.assertTrue(output.contains( "## version Details\n" + "written_by Version \n" - + "TestUtil 1.5.0-SNAPSHOT")); + + "TestUtil 1.6.0-SNAPSHOT")); } @Test