Skip to content

Commit

Permalink
review comments handled
Browse files Browse the repository at this point in the history
  • Loading branch information
akashrn5 committed Oct 24, 2018
1 parent dd57c30 commit 62f95e4
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
|'carbondata' LOCATION
|'$writerPath' """.stripMargin)

sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").show(1000,false)
val output = sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").collect()

assert(output.toList.contains(Row("written_by Version ")))

checkExistence(sql("describe formatted sdkOutputTable"), true, "age,name")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.execution.command.{Checker, DataCommand}
import org.apache.spark.sql.types.StringType

import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.tool.CarbonCli

/**
* Show summary command class which is integrated to cli and sql support is provided via this class
* @param databaseNameOp
* @param tableName
* @param commandOptions
*/
case class CarbonShowSummaryCommand(
databaseNameOp: Option[String],
tableName: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -497,10 +497,10 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {


protected lazy val cli: Parser[LogicalPlan] =
(SHOW ~> SUMMARY ~> FOR ~> TABLE) ~ (ident <~ ".").? ~ ident ~
(SHOW ~> SUMMARY ~> FOR ~> TABLE) ~> (ident <~ ".").? ~ ident ~
(OPTIONS ~> "(" ~> repsep(summaryOptions, ",") <~ ")").? <~
opt(";") ^^ {
case showSummary ~ databaseName ~ tableName ~ commandList =>
case databaseName ~ tableName ~ commandList =>
var commandOptions: Map[String, String] = null
if (commandList.isDefined) {
commandOptions = commandList.getOrElse(List.empty[(String, String)]).toMap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.carbondata.common.annotations.InterfaceAudience;
import org.apache.carbondata.common.annotations.InterfaceStability;
Expand All @@ -41,8 +42,11 @@
@InterfaceStability.Unstable
public class CarbonCli {

private static ArrayList<String> outPuts;
// List to collect all the outputs of option details
private static List<String> outPuts;

// a boolean variable to decide whether to print the output in console or return the list,
// by default true, and it will be set to false if the cli is trigerred via sql command
private static boolean isPrintInConsole = true;

private static Options buildOptions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ class ColumnChunk {
// min/max stats of this column chunk
byte[] min, max;

// to set whether min max is present for the column chunck, as we may not right min max after
// specific size
boolean isMinMaxPresent;

// percentage of min/max comparing to min/max scope collected in all blocklets
// they are set after calculation in DataSummary
double minPercentage, maxPercentage;
Expand All @@ -335,6 +339,7 @@ class ColumnChunk {
this.column = column;
min = index.min_max_index.min_values.get(columnIndex).array();
max = index.min_max_index.max_values.get(columnIndex).array();
isMinMaxPresent = index.min_max_index.min_max_presence.get(columnIndex);

// read the column chunk metadata: DataChunk3
ByteBuffer buffer = fileReader.readByteBuffer(
Expand Down
102 changes: 57 additions & 45 deletions tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
Expand All @@ -40,6 +39,7 @@
import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.format.BlockletInfo3;
import org.apache.carbondata.format.DataChunk2;
import org.apache.carbondata.format.DataChunk3;
Expand All @@ -57,12 +57,12 @@
*/
class DataSummary implements Command {
private String dataFolder;
private ArrayList<String> outPuts;
private List<String> outPuts;

// file path mapping to file object
private LinkedHashMap<String, DataFile> dataFiles;

DataSummary(String dataFolder, ArrayList<String> outPuts) {
DataSummary(String dataFolder, List<String> outPuts) {
this.dataFolder = dataFolder;
this.outPuts = outPuts;
}
Expand All @@ -82,40 +82,40 @@ public void run(CommandLine line) throws IOException, MemoryException {
}
if (line.hasOption("s") || printAll) {
if (dataFiles.size() > 0) {
printSchema(dataFiles.entrySet().iterator().next().getValue());
collectSchemaDetails(dataFiles.entrySet().iterator().next().getValue());
}
}
if (line.hasOption("m") || printAll) {
printSegments(collector.getTableStatusFile());
collectSegmentsDetails(collector.getTableStatusFile());
}
if (line.hasOption("t") || printAll) {
printTableProperties(collector.getSchemaFile());
collectTableProperties(collector.getSchemaFile());
}
if (line.hasOption("b") || printAll) {
String limitSize = line.getOptionValue("b");
if (limitSize == null) {
// by default we can limit the output to two shards and user can increase this limit
limitSize = "2";
}
printBlockletDetail(Integer.parseInt(limitSize));
collectBlockletDetail(Integer.parseInt(limitSize));
}
if (line.hasOption("v") || printAll) {
printVersionDetails();
collectVersionDetails();
}
if (line.hasOption("B")) {
String blockFileName = line.getOptionValue("B");
printBlockDetails(blockFileName);
collectBlockDetails(blockFileName);
}
if (line.hasOption("c")) {
String columName = line.getOptionValue("c");
printColumnStats(columName);
if (line.hasOption("k")) {
printColumnChunkMeta(columName);
collectColumnChunkMeta(columName);
}
}
}

private void printSchema(DataFile dataFile) throws IOException {
private void collectSchemaDetails(DataFile dataFile) throws IOException {
CarbonFile file = FileFactory.getCarbonFile(dataFile.getFilePath());
outPuts.add("");
outPuts.add("## Schema");
Expand All @@ -125,7 +125,7 @@ private void printSchema(DataFile dataFile) throws IOException {
outPuts.add("version: V" + header.version);
outPuts.add("timestamp: " + new java.sql.Timestamp(header.time_stamp));
List<ColumnSchema> columns = reader.readSchema();
TablePrinter printer = new TablePrinter(
TableFormatter tableFormatter = new TableFormatter(
new String[]{"Column Name", "Data Type", "Column Type",
"SortColumn", "Encoding", "Ordinal", "Id"}, outPuts);
for (ColumnSchema column : columns) {
Expand All @@ -134,7 +134,7 @@ private void printSchema(DataFile dataFile) throws IOException {
shortColumnId = "*" +
column.getColumnUniqueId().substring(column.getColumnUniqueId().length() - 4);
}
printer.addRow(new String[]{
tableFormatter.addRow(new String[]{
column.getColumnName(),
column.getDataType().getName(),
column.isDimensionColumn() ? "dimension" : "measure",
Expand All @@ -144,17 +144,17 @@ private void printSchema(DataFile dataFile) throws IOException {
shortColumnId
});
}
printer.printFormatted();
tableFormatter.printFormatted();
}

private void printSegments(CarbonFile tableStatusFile) throws IOException {
private void collectSegmentsDetails(CarbonFile tableStatusFile) throws IOException {
outPuts.add("");
outPuts.add("## Segment");
if (tableStatusFile != null) {
// first collect all information in memory then print a formatted table
LoadMetadataDetails[] segments =
SegmentStatusManager.readTableStatusFile(tableStatusFile.getPath());
TablePrinter printer = new TablePrinter(
TableFormatter tableFormatter = new TableFormatter(
new String[]{"SegmentID", "Status", "Load Start", "Load End",
"Merged To", "Format", "Data Size", "Index Size"}, outPuts);
for (LoadMetadataDetails segment : segments) {
Expand All @@ -169,7 +169,7 @@ private void printSegments(CarbonFile tableStatusFile) throws IOException {
} else {
indexSize = Strings.formatSize(Long.parseLong(segment.getIndexSize()));
}
printer.addRow(new String[]{
tableFormatter.addRow(new String[]{
segment.getLoadName(),
segment.getSegmentStatus().toString(),
new java.sql.Date(segment.getLoadStartTime()).toString(),
Expand All @@ -180,33 +180,33 @@ private void printSegments(CarbonFile tableStatusFile) throws IOException {
indexSize}
);
}
printer.printFormatted();
tableFormatter.printFormatted();
} else {
outPuts.add("table status file not found");
}
}

private void printTableProperties(CarbonFile schemaFile) throws IOException {
private void collectTableProperties(CarbonFile schemaFile) throws IOException {
outPuts.add("");
outPuts.add("## Table Properties");
if (schemaFile != null) {
TableInfo thriftTableInfo = CarbonUtil.readSchemaFile(schemaFile.getPath());
Map<String, String> tblProperties = thriftTableInfo.fact_table.tableProperties;
TablePrinter printer = new TablePrinter(
TableFormatter tableFormatter = new TableFormatter(
new String[]{"Property Name", "Property Value"}, outPuts);
for (Map.Entry<String, String> entry : tblProperties.entrySet()) {
printer.addRow(new String[] {
tableFormatter.addRow(new String[] {
String.format("'%s'", entry.getKey()),
String.format("'%s'", entry.getValue())
});
}
printer.printFormatted();
tableFormatter.printFormatted();
} else {
outPuts.add("schema file not found");
}
}

private void printBlockletDetail(int limitSize) {
private void collectBlockletDetail(int limitSize) {
outPuts.add("");
outPuts.add("## Block Detail");

Expand All @@ -231,43 +231,44 @@ private void printBlockletDetail(int limitSize) {
break;
}
}
printer.printFormatted();
printer.collectFormattedData();
}

private void printBlockDetails(String blockFilePath) throws IOException {
private void collectBlockDetails(String blockFilePath) throws IOException {
outPuts.add("");
outPuts.add("## Filtered Block Details for: " + blockFilePath
.substring(blockFilePath.lastIndexOf(File.separator) + 1, blockFilePath.length()));
TablePrinter printer =
new TablePrinter(new String[] { "BLKLT", "NumPages", "NumRows", "Size" }, outPuts);
TableFormatter tableFormatter =
new TableFormatter(new String[] { "BLKLT", "NumPages", "NumRows", "Size" }, outPuts);
CarbonFile datafile = FileFactory.getCarbonFile(blockFilePath);
DataFile dataFile = new DataFile(datafile);
dataFile.collectAllMeta();
FileFooter3 footer = dataFile.getFooter();
for (int blockletId = 0; blockletId < footer.blocklet_info_list3.size(); blockletId++) {
BlockletInfo3 blocklet = footer.blocklet_info_list3.get(blockletId);
printer.addRow(new String[]{
tableFormatter.addRow(new String[]{
String.valueOf(blockletId),
String.format("%,d", blocklet.number_number_of_pages),
String.format("%,d", blocklet.num_rows),
Strings.formatSize(dataFile.getBlockletSizeInBytes(blockletId))
});
}
printer.printFormatted();
tableFormatter.printFormatted();
}

private void printVersionDetails() {
private void collectVersionDetails() {
DataFile file = dataFiles.entrySet().iterator().next().getValue();
FileFooter3 footer = file.getFooter();
if (null != footer.getExtra_info()) {
outPuts.add("");
outPuts.add("## version Details");
TablePrinter printer = new TablePrinter(new String[] { "written_by", "Version" }, outPuts);
printer.addRow(new String[] { String.format("%s",
TableFormatter tableFormatter =
new TableFormatter(new String[] { "written_by", "Version" }, outPuts);
tableFormatter.addRow(new String[] { String.format("%s",
footer.getExtra_info().get(CarbonCommonConstants.CARBON_WRITTEN_BY_FOOTER_INFO)),
String.format("%s",
footer.getExtra_info().get(CarbonCommonConstants.CARBON_VERSION_FOOTER_INFO)) });
printer.printFormatted();
tableFormatter.printFormatted();
}
}

Expand Down Expand Up @@ -300,9 +301,11 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
if (blocklet.getColumnChunk().getDataType() == DataTypes.STRING) {
minPercent = "NA";
maxPercent = "NA";
// for complex types min max can be given as NA
// for complex types min max can be given as NA and for varchar where min max is not
// written, can give NA
if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || blocklet
.getColumnChunk().column.getColumnName().contains(".")) {
.getColumnChunk().column.getColumnName().contains(".") || !blocklet
.getColumnChunk().isMinMaxPresent) {
min = "NA";
max = "NA";
} else {
Expand All @@ -312,15 +315,24 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
} else {
minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100);
maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100);
if (blockletMin.length > 4) {
min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length));
DataFile.ColumnChunk columnChunk = blocklet.columnChunk;
if (columnChunk.column.isDimensionColumn() && DataTypeUtil
.isPrimitiveColumn(columnChunk.column.getDataType())) {
min = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMin,
columnChunk.column.getDataType()).toString();
max = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMax,
columnChunk.column.getDataType()).toString();
} else {
min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length));
}
if (blockletMax.length > 4) {
max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length));
} else {
max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length));
if (blockletMin.length > 4) {
min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length));
} else {
min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length));
}
if (blockletMax.length > 4) {
max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length));
} else {
max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length));
}
}
}
printer.addRow(
Expand All @@ -341,7 +353,7 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
);
}
}
printer.printFormatted();
printer.collectFormattedData();
}

private void collectStats(String columnName) throws IOException, MemoryException {
Expand All @@ -354,7 +366,7 @@ private void collectStats(String columnName) throws IOException, MemoryException
}
}

private void printColumnChunkMeta(String columnName) throws IOException, MemoryException {
private void collectColumnChunkMeta(String columnName) throws IOException, MemoryException {
DataFile file = dataFiles.entrySet().iterator().next().getValue();
outPuts.add("");
outPuts.add("## Page Meta for column '" + columnName + "' in file " + file.getFilePath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ class FileCollector {
private long numPage;
private long numRow;
private long totalDataSize;
private ArrayList<String> outPuts;
private List<String> outPuts;

// file path mapping to file object
private LinkedHashMap<String, DataFile> dataFiles = new LinkedHashMap<>();
private CarbonFile tableStatusFile;
private CarbonFile schemaFile;


FileCollector(ArrayList<String> outPuts) {
FileCollector(List<String> outPuts) {
this.outPuts = outPuts;
}

Expand Down
Loading

0 comments on commit 62f95e4

Please sign in to comment.