review comments handled

apache · Oct 24, 2018 · 62f95e4 · 62f95e4
1 parent dd57c30
commit 62f95e4
Show file tree

Hide file tree

Showing 11 changed files with 98 additions and 71 deletions.
diff --git a/...a/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala b/...a/org/apache/carbondata/spark/testsuite/createTable/TestNonTransactionalCarbonTable.scala
@@ -389,7 +389,9 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
          |'carbondata' LOCATION
          |'$writerPath' """.stripMargin)
 
-    sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").show(1000,false)
+    val output = sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").collect()
+
+    assert(output.toList.contains(Row("written_by                       Version         ")))
 
     checkExistence(sql("describe formatted sdkOutputTable"), true, "age,name")
 

diff --git a/...in/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala b/...in/scala/org/apache/spark/sql/execution/command/management/CarbonShowSummaryCommand.scala
@@ -26,9 +26,14 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.execution.command.{Checker, DataCommand}
 import org.apache.spark.sql.types.StringType
 
-import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
 import org.apache.carbondata.tool.CarbonCli
 
+/**
+ * Show summary command class which is integrated to cli and sql support is provided via this class
+ * @param databaseNameOp
+ * @param tableName
+ * @param commandOptions
+ */
 case class CarbonShowSummaryCommand(
     databaseNameOp: Option[String],
     tableName: String,

diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/parser/CarbonSpark2SqlParser.scala
@@ -497,10 +497,10 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {
 
 
   protected lazy val cli: Parser[LogicalPlan] =
-    (SHOW ~> SUMMARY ~> FOR ~> TABLE) ~ (ident <~ ".").? ~ ident ~
+    (SHOW ~> SUMMARY ~> FOR ~> TABLE) ~> (ident <~ ".").? ~ ident ~
     (OPTIONS ~> "(" ~> repsep(summaryOptions, ",") <~ ")").? <~
     opt(";") ^^ {
-      case showSummary ~ databaseName ~ tableName ~ commandList =>
+      case databaseName ~ tableName ~ commandList =>
         var commandOptions: Map[String, String] = null
         if (commandList.isDefined) {
           commandOptions = commandList.getOrElse(List.empty[(String, String)]).toMap

diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java b/tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.carbondata.common.annotations.InterfaceAudience;
 import org.apache.carbondata.common.annotations.InterfaceStability;
@@ -41,8 +42,11 @@
 @InterfaceStability.Unstable
 public class CarbonCli {
 
-  private static ArrayList<String> outPuts;
+  // List to collect all the outputs of option details
+  private static List<String> outPuts;
 
+  // a boolean variable to decide whether to print the output in console or return the list,
+  // by default true, and it will be set to false if the cli is trigerred via sql command
   private static boolean isPrintInConsole = true;
 
   private static Options buildOptions() {

diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
@@ -317,6 +317,10 @@ class ColumnChunk {
     // min/max stats of this column chunk
     byte[] min, max;
 
+    // to set whether min max is present for the column chunck, as we may not right min max after
+    // specific size
+    boolean isMinMaxPresent;
+
     // percentage of min/max comparing to min/max scope collected in all blocklets
     // they are set after calculation in DataSummary
     double minPercentage, maxPercentage;
@@ -335,6 +339,7 @@ class ColumnChunk {
       this.column = column;
       min = index.min_max_index.min_values.get(columnIndex).array();
       max = index.min_max_index.max_values.get(columnIndex).array();
+      isMinMaxPresent = index.min_max_index.min_max_presence.get(columnIndex);
 
       // read the column chunk metadata: DataChunk3
       ByteBuffer buffer = fileReader.readByteBuffer(

diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java b/tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
@@ -20,7 +20,6 @@
 import java.io.File;
 import java.io.IOException;
 import java.nio.charset.Charset;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -40,6 +39,7 @@
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
 import org.apache.carbondata.core.util.ByteUtil;
 import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
 import org.apache.carbondata.format.BlockletInfo3;
 import org.apache.carbondata.format.DataChunk2;
 import org.apache.carbondata.format.DataChunk3;
@@ -57,12 +57,12 @@
  */
 class DataSummary implements Command {
   private String dataFolder;
-  private ArrayList<String> outPuts;
+  private List<String> outPuts;
 
   // file path mapping to file object
   private LinkedHashMap<String, DataFile> dataFiles;
 
-  DataSummary(String dataFolder, ArrayList<String> outPuts) {
+  DataSummary(String dataFolder, List<String> outPuts) {
     this.dataFolder = dataFolder;
     this.outPuts = outPuts;
   }
@@ -82,40 +82,40 @@ public void run(CommandLine line) throws IOException, MemoryException {
     }
     if (line.hasOption("s") || printAll) {
       if (dataFiles.size() > 0) {
-        printSchema(dataFiles.entrySet().iterator().next().getValue());
+        collectSchemaDetails(dataFiles.entrySet().iterator().next().getValue());
       }
     }
     if (line.hasOption("m") || printAll) {
-      printSegments(collector.getTableStatusFile());
+      collectSegmentsDetails(collector.getTableStatusFile());
     }
     if (line.hasOption("t") || printAll) {
-      printTableProperties(collector.getSchemaFile());
+      collectTableProperties(collector.getSchemaFile());
     }
     if (line.hasOption("b") || printAll) {
       String limitSize = line.getOptionValue("b");
       if (limitSize == null) {
         // by default we can limit the output to two shards and user can increase this limit
         limitSize = "2";
       }
-      printBlockletDetail(Integer.parseInt(limitSize));
+      collectBlockletDetail(Integer.parseInt(limitSize));
     }
     if (line.hasOption("v") || printAll) {
-      printVersionDetails();
+      collectVersionDetails();
     }
     if (line.hasOption("B")) {
       String blockFileName = line.getOptionValue("B");
-      printBlockDetails(blockFileName);
+      collectBlockDetails(blockFileName);
     }
     if (line.hasOption("c")) {
       String columName = line.getOptionValue("c");
       printColumnStats(columName);
       if (line.hasOption("k")) {
-        printColumnChunkMeta(columName);
+        collectColumnChunkMeta(columName);
       }
     }
   }
 
-  private void printSchema(DataFile dataFile) throws IOException {
+  private void collectSchemaDetails(DataFile dataFile) throws IOException {
     CarbonFile file = FileFactory.getCarbonFile(dataFile.getFilePath());
     outPuts.add("");
     outPuts.add("## Schema");
@@ -125,7 +125,7 @@ private void printSchema(DataFile dataFile) throws IOException {
     outPuts.add("version: V" + header.version);
     outPuts.add("timestamp: " + new java.sql.Timestamp(header.time_stamp));
     List<ColumnSchema> columns = reader.readSchema();
-    TablePrinter printer = new TablePrinter(
+    TableFormatter tableFormatter = new TableFormatter(
         new String[]{"Column Name", "Data Type", "Column Type",
             "SortColumn", "Encoding", "Ordinal", "Id"}, outPuts);
     for (ColumnSchema column : columns) {
@@ -134,7 +134,7 @@ private void printSchema(DataFile dataFile) throws IOException {
         shortColumnId = "*" +
             column.getColumnUniqueId().substring(column.getColumnUniqueId().length() - 4);
       }
-      printer.addRow(new String[]{
+      tableFormatter.addRow(new String[]{
           column.getColumnName(),
           column.getDataType().getName(),
           column.isDimensionColumn() ? "dimension" : "measure",
@@ -144,17 +144,17 @@ private void printSchema(DataFile dataFile) throws IOException {
           shortColumnId
       });
     }
-    printer.printFormatted();
+    tableFormatter.printFormatted();
   }
 
-  private void printSegments(CarbonFile tableStatusFile) throws IOException {
+  private void collectSegmentsDetails(CarbonFile tableStatusFile) throws IOException {
     outPuts.add("");
     outPuts.add("## Segment");
     if (tableStatusFile != null) {
       // first collect all information in memory then print a formatted table
       LoadMetadataDetails[] segments =
           SegmentStatusManager.readTableStatusFile(tableStatusFile.getPath());
-      TablePrinter printer = new TablePrinter(
+      TableFormatter tableFormatter = new TableFormatter(
           new String[]{"SegmentID", "Status", "Load Start", "Load End",
               "Merged To", "Format", "Data Size", "Index Size"}, outPuts);
       for (LoadMetadataDetails segment : segments) {
@@ -169,7 +169,7 @@ private void printSegments(CarbonFile tableStatusFile) throws IOException {
         } else {
           indexSize = Strings.formatSize(Long.parseLong(segment.getIndexSize()));
         }
-        printer.addRow(new String[]{
+        tableFormatter.addRow(new String[]{
             segment.getLoadName(),
             segment.getSegmentStatus().toString(),
             new java.sql.Date(segment.getLoadStartTime()).toString(),
@@ -180,33 +180,33 @@ private void printSegments(CarbonFile tableStatusFile) throws IOException {
             indexSize}
         );
       }
-      printer.printFormatted();
+      tableFormatter.printFormatted();
     } else {
       outPuts.add("table status file not found");
     }
   }
 
-  private void printTableProperties(CarbonFile schemaFile) throws IOException {
+  private void collectTableProperties(CarbonFile schemaFile) throws IOException {
     outPuts.add("");
     outPuts.add("## Table Properties");
     if (schemaFile != null) {
       TableInfo thriftTableInfo = CarbonUtil.readSchemaFile(schemaFile.getPath());
       Map<String, String> tblProperties = thriftTableInfo.fact_table.tableProperties;
-      TablePrinter printer = new TablePrinter(
+      TableFormatter tableFormatter = new TableFormatter(
           new String[]{"Property Name", "Property Value"}, outPuts);
       for (Map.Entry<String, String> entry : tblProperties.entrySet()) {
-        printer.addRow(new String[] {
+        tableFormatter.addRow(new String[] {
             String.format("'%s'", entry.getKey()),
             String.format("'%s'", entry.getValue())
         });
       }
-      printer.printFormatted();
+      tableFormatter.printFormatted();
     } else {
       outPuts.add("schema file not found");
     }
   }
 
-  private void printBlockletDetail(int limitSize) {
+  private void collectBlockletDetail(int limitSize) {
     outPuts.add("");
     outPuts.add("## Block Detail");
 
@@ -231,43 +231,44 @@ private void printBlockletDetail(int limitSize) {
         break;
       }
     }
-    printer.printFormatted();
+    printer.collectFormattedData();
   }
 
-  private void printBlockDetails(String blockFilePath) throws IOException {
+  private void collectBlockDetails(String blockFilePath) throws IOException {
     outPuts.add("");
     outPuts.add("## Filtered Block Details for: " + blockFilePath
         .substring(blockFilePath.lastIndexOf(File.separator) + 1, blockFilePath.length()));
-    TablePrinter printer =
-        new TablePrinter(new String[] { "BLKLT", "NumPages", "NumRows", "Size" }, outPuts);
+    TableFormatter tableFormatter =
+        new TableFormatter(new String[] { "BLKLT", "NumPages", "NumRows", "Size" }, outPuts);
     CarbonFile datafile = FileFactory.getCarbonFile(blockFilePath);
     DataFile dataFile = new DataFile(datafile);
     dataFile.collectAllMeta();
     FileFooter3 footer = dataFile.getFooter();
     for (int blockletId = 0; blockletId < footer.blocklet_info_list3.size(); blockletId++) {
       BlockletInfo3 blocklet = footer.blocklet_info_list3.get(blockletId);
-      printer.addRow(new String[]{
+      tableFormatter.addRow(new String[]{
           String.valueOf(blockletId),
           String.format("%,d", blocklet.number_number_of_pages),
           String.format("%,d", blocklet.num_rows),
           Strings.formatSize(dataFile.getBlockletSizeInBytes(blockletId))
       });
     }
-    printer.printFormatted();
+    tableFormatter.printFormatted();
   }
 
-  private void printVersionDetails() {
+  private void collectVersionDetails() {
     DataFile file = dataFiles.entrySet().iterator().next().getValue();
     FileFooter3 footer = file.getFooter();
     if (null != footer.getExtra_info()) {
       outPuts.add("");
       outPuts.add("## version Details");
-      TablePrinter printer = new TablePrinter(new String[] { "written_by", "Version" }, outPuts);
-      printer.addRow(new String[] { String.format("%s",
+      TableFormatter tableFormatter =
+          new TableFormatter(new String[] { "written_by", "Version" }, outPuts);
+      tableFormatter.addRow(new String[] { String.format("%s",
           footer.getExtra_info().get(CarbonCommonConstants.CARBON_WRITTEN_BY_FOOTER_INFO)),
           String.format("%s",
               footer.getExtra_info().get(CarbonCommonConstants.CARBON_VERSION_FOOTER_INFO)) });
-      printer.printFormatted();
+      tableFormatter.printFormatted();
     }
   }
 
@@ -300,9 +301,11 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
         if (blocklet.getColumnChunk().getDataType() == DataTypes.STRING) {
           minPercent = "NA";
           maxPercent = "NA";
-          // for complex types min max can be given as NA
+          // for complex types min max can be given as NA and for varchar where min max is not
+          // written, can give NA
           if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || blocklet
-              .getColumnChunk().column.getColumnName().contains(".")) {
+              .getColumnChunk().column.getColumnName().contains(".") || !blocklet
+              .getColumnChunk().isMinMaxPresent) {
             min = "NA";
             max = "NA";
           } else {
@@ -312,15 +315,24 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
         } else {
           minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100);
           maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100);
-          if (blockletMin.length > 4) {
-            min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length));
+          DataFile.ColumnChunk columnChunk = blocklet.columnChunk;
+          if (columnChunk.column.isDimensionColumn() && DataTypeUtil
+              .isPrimitiveColumn(columnChunk.column.getDataType())) {
+            min = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMin,
+                columnChunk.column.getDataType()).toString();
+            max = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMax,
+                columnChunk.column.getDataType()).toString();
           } else {
-            min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length));
-          }
-          if (blockletMax.length > 4) {
-            max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length));
-          } else {
-            max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length));
+            if (blockletMin.length > 4) {
+              min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length));
+            } else {
+              min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length));
+            }
+            if (blockletMax.length > 4) {
+              max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length));
+            } else {
+              max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length));
+            }
           }
         }
         printer.addRow(
@@ -341,7 +353,7 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
         );
       }
     }
-    printer.printFormatted();
+    printer.collectFormattedData();
   }
 
   private void collectStats(String columnName) throws IOException, MemoryException {
@@ -354,7 +366,7 @@ private void collectStats(String columnName) throws IOException, MemoryException
     }
   }
 
-  private void printColumnChunkMeta(String columnName) throws IOException, MemoryException {
+  private void collectColumnChunkMeta(String columnName) throws IOException, MemoryException {
     DataFile file = dataFiles.entrySet().iterator().next().getValue();
     outPuts.add("");
     outPuts.add("## Page Meta for column '" + columnName + "' in file " + file.getFilePath());

diff --git a/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java b/tools/cli/src/main/java/org/apache/carbondata/tool/FileCollector.java
@@ -42,15 +42,15 @@ class FileCollector {
   private long numPage;
   private long numRow;
   private long totalDataSize;
-  private ArrayList<String> outPuts;
+  private List<String> outPuts;
 
   // file path mapping to file object
   private LinkedHashMap<String, DataFile> dataFiles = new LinkedHashMap<>();
   private CarbonFile tableStatusFile;
   private CarbonFile schemaFile;
 
 
-  FileCollector(ArrayList<String> outPuts) {
+  FileCollector(List<String> outPuts) {
     this.outPuts = outPuts;
   }