Skip to content

Commit

Permalink
Merge fc888b5 into f51f5cd
Browse files Browse the repository at this point in the history
  • Loading branch information
akashrn5 committed Nov 2, 2018
2 parents f51f5cd + fc888b5 commit b9da8be
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 66 deletions.
Expand Up @@ -61,7 +61,8 @@ public static void main(String[] args) {
CarbonWriter writer = CarbonWriter.builder()
.outputPath(path)
.withLoadOptions(map)
.withCsvInput(new Schema(fields)).build();
.withCsvInput(new Schema(fields))
.writtenBy("CarbonReaderExample").build();

for (int i = 0; i < 10; i++) {
String[] row2 = new String[]{
Expand Down
Expand Up @@ -389,7 +389,7 @@ class TestNonTransactionalCarbonTable extends QueryTest with BeforeAndAfterAll {
|'carbondata' LOCATION
|'$writerPath' """.stripMargin)

val output = sql("show summary for table sdkOutputTable options('command'='-cmd,summary,-p,-a,-v,-c,age')").collect()
val output = sql("Carboncli for table sdkOutputTable options('-cmd summary -a -v -c age')").collect()

assert(output.toList.contains(Row("written_by Version ")))

Expand Down
Expand Up @@ -188,7 +188,7 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
protected val STREAM = carbonKeyWord("STREAM")
protected val STREAMS = carbonKeyWord("STREAMS")
protected val STMPROPERTIES = carbonKeyWord("STMPROPERTIES")
protected val SUMMARY = carbonKeyWord("SUMMARY")
protected val CARBONCLI = carbonKeyWord("CARBONCLI")

protected val doubleQuotedString = "\"([^\"]+)\"".r
protected val singleQuotedString = "'([^']+)'".r
Expand Down Expand Up @@ -1145,10 +1145,10 @@ abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
case _ => ("", "")
}

protected lazy val summaryOptions: Parser[(String, String)] =
(stringLit <~ "=") ~ stringLit ^^ {
case opt ~ optvalue => (opt.trim.toLowerCase(), optvalue)
case _ => ("", "")
protected lazy val commandOptions: Parser[String] =
stringLit ^^ {
case optValue => optValue
case _ => ""
}


Expand Down
Expand Up @@ -29,28 +29,28 @@ import org.apache.spark.sql.types.StringType
import org.apache.carbondata.tool.CarbonCli

/**
* Show summary command class which is integrated to cli and sql support is provided via this class
* CarbonCLi command class which is integrated to cli and sql support is provided via this class
* @param databaseNameOp
* @param tableName
* @param commandOptions
*/
case class CarbonShowSummaryCommand(
case class CarbonCliCommand(
databaseNameOp: Option[String],
tableName: String,
commandOptions: Map[String, String])
commandOptions: String)
extends DataCommand {

override def output: Seq[Attribute] = {
Seq(AttributeReference("Table Summary", StringType, nullable = false)())
Seq(AttributeReference("CarbonCli", StringType, nullable = false)())
}

override def processData(sparkSession: SparkSession): Seq[Row] = {
Checker.validateTableExists(databaseNameOp, tableName, sparkSession)
val carbonTable = CarbonEnv.getCarbonTable(databaseNameOp, tableName)(sparkSession)
val commandArgs: Seq[String] = commandOptions("command").split(",")
val commandArgs: Seq[String] = commandOptions.split("\\s+")
val finalCommands = commandArgs.collect {
case a if a.trim.equalsIgnoreCase("-p") =>
Seq(a, carbonTable.getTablePath)
case a if a.trim.equalsIgnoreCase("summary") || a.trim.equalsIgnoreCase("benchmark") =>
Seq(a, "-p", carbonTable.getTablePath)
case x => Seq(x.trim)
}.flatten
val summaryOutput = new util.ArrayList[String]()
Expand Down
Expand Up @@ -497,18 +497,18 @@ class CarbonSpark2SqlParser extends CarbonDDLSqlParser {


protected lazy val cli: Parser[LogicalPlan] =
(SHOW ~> SUMMARY ~> FOR ~> TABLE) ~> (ident <~ ".").? ~ ident ~
(OPTIONS ~> "(" ~> repsep(summaryOptions, ",") <~ ")").? <~
(CARBONCLI ~> FOR ~> TABLE) ~> (ident <~ ".").? ~ ident ~
(OPTIONS ~> "(" ~> commandOptions <~ ")").? <~
opt(";") ^^ {
case databaseName ~ tableName ~ commandList =>
var commandOptions: Map[String, String] = null
var commandOptions: String = null
if (commandList.isDefined) {
commandOptions = commandList.getOrElse(List.empty[(String, String)]).toMap
commandOptions = commandList.get
}
CarbonShowSummaryCommand(
CarbonCliCommand(
convertDbNameToLowerCase(databaseName),
tableName.toLowerCase(),
commandOptions.map { case (key, value) => key.toLowerCase -> value })
commandOptions)
}


Expand Down
21 changes: 17 additions & 4 deletions tools/cli/src/main/java/org/apache/carbondata/tool/CarbonCli.java
Expand Up @@ -19,6 +19,8 @@

import java.io.IOException;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;

Expand Down Expand Up @@ -148,7 +150,10 @@ private static void runCli(PrintStream out, Options options, CommandLine line)
outPuts = new ArrayList<>();
}
if (line.hasOption("h")) {
printHelp(options);
collectHelpInfo(options);
for (String output : outPuts) {
out.println(output);
}
return;
}

Expand All @@ -167,7 +172,10 @@ private static void runCli(PrintStream out, Options options, CommandLine line)
} else {
out.println("command " + cmd + " is not supported");
outPuts.add("command " + cmd + " is not supported");
printHelp(options);
collectHelpInfo(options);
for (String output : outPuts) {
out.println(output);
}
return;
}

Expand All @@ -186,9 +194,14 @@ private static void runCli(PrintStream out, Options options, CommandLine line)
}
}

private static void printHelp(Options options) {
private static void collectHelpInfo(Options options) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("CarbonCli", options);
StringWriter stringWriter = new StringWriter();
PrintWriter printWriter = new PrintWriter(stringWriter);
formatter.printHelp(printWriter, formatter.getWidth(), "CarbonCli", null, options,
formatter.getLeftPadding(), formatter.getDescPadding(), null, false);
printWriter.flush();
outPuts.add(stringWriter.toString());
}

}
15 changes: 13 additions & 2 deletions tools/cli/src/main/java/org/apache/carbondata/tool/DataFile.java
Expand Up @@ -33,6 +33,7 @@
import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.reader.CarbonFooterReaderV3;
import org.apache.carbondata.core.reader.CarbonHeaderReader;
Expand Down Expand Up @@ -446,7 +447,8 @@ void computePercentage(byte[] shardMin, byte[] shardMax) {
* @return result
*/
private double computePercentage(byte[] data, byte[] min, byte[] max, ColumnSchema column) {
if (column.getDataType() == DataTypes.STRING) {
if (column.getDataType() == DataTypes.STRING || column.getDataType() == DataTypes.BOOLEAN
|| column.hasEncoding(Encoding.DICTIONARY)) {
// for string, we do not calculate
return 0;
} else if (DataTypes.isDecimal(column.getDataType())) {
Expand All @@ -456,7 +458,16 @@ private double computePercentage(byte[] data, byte[] min, byte[] max, ColumnSche
return dataValue.divide(factorValue).doubleValue();
}
double dataValue, minValue, factorValue;
if (column.getDataType() == DataTypes.SHORT) {
if (columnChunk.column.isDimensionColumn() &&
DataTypeUtil.isPrimitiveColumn(columnChunk.column.getDataType())) {
minValue = Double.valueOf(String.valueOf(
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(min, column.getDataType())));
dataValue = Double.valueOf(String.valueOf(
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(data, column.getDataType())));
factorValue = Double.valueOf(String.valueOf(
DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(max, column.getDataType())))
- minValue;
} else if (column.getDataType() == DataTypes.SHORT) {
minValue = ByteUtil.toShort(min, 0);
dataValue = ByteUtil.toShort(data, 0) - minValue;
factorValue = ByteUtil.toShort(max, 0) - ByteUtil.toShort(min, 0);
Expand Down
93 changes: 53 additions & 40 deletions tools/cli/src/main/java/org/apache/carbondata/tool/DataSummary.java
Expand Up @@ -20,24 +20,19 @@
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.*;

import org.apache.carbondata.common.Strings;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.memory.MemoryException;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.reader.CarbonHeaderReader;
import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
import org.apache.carbondata.core.util.ByteUtil;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.format.BlockletInfo3;
Expand Down Expand Up @@ -80,7 +75,9 @@ public void run(CommandLine line) throws IOException, MemoryException {
}
if (line.hasOption("s") || printAll) {
if (dataFiles.size() > 0) {
collectSchemaDetails(dataFiles.entrySet().iterator().next().getValue());
List<String> dataFilesSet = new ArrayList<>(dataFiles.keySet());
Collections.reverse(dataFilesSet);
collectSchemaDetails(dataFiles.get(dataFilesSet.get(0)));
}
}
if (line.hasOption("m") || printAll) {
Expand Down Expand Up @@ -175,8 +172,8 @@ private void collectSegmentsDetails(CarbonFile tableStatusFile) throws IOExcepti
tableFormatter.addRow(new String[]{
segment.getLoadName(),
segment.getSegmentStatus().toString(),
new java.sql.Date(segment.getLoadStartTime()).toString(),
new java.sql.Date(segment.getLoadEndTime()).toString(),
new java.sql.Timestamp(segment.getLoadStartTime()).toString(),
new java.sql.Timestamp(segment.getLoadEndTime()).toString(),
segment.getMergedLoadName() == null ? "NA" : segment.getMergedLoadName(),
segment.getFileFormat().toString(),
dataSize,
Expand Down Expand Up @@ -306,7 +303,8 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
maxPercent = "NA";
// for complex types min max can be given as NA and for varchar where min max is not
// written, can give NA
if (blocklet.getColumnChunk().column.getColumnName().contains(".val") || blocklet
if (blocklet.getColumnChunk().column.hasEncoding(Encoding.DICTIONARY) || blocklet
.getColumnChunk().column.getColumnName().contains(".val") || blocklet
.getColumnChunk().column.getColumnName().contains(".") || !blocklet
.getColumnChunk().isMinMaxPresent) {
min = "NA";
Expand All @@ -316,26 +314,39 @@ private void printColumnStats(String columnName) throws IOException, MemoryExcep
max = new String(blockletMax, Charset.forName(DEFAULT_CHARSET));
}
} else {
minPercent = String.format("%.1f", blocklet.getColumnChunk().getMinPercentage() * 100);
maxPercent = String.format("%.1f", blocklet.getColumnChunk().getMaxPercentage() * 100);
// for complex columns min and max and percentage
if (blocklet.getColumnChunk().column.getColumnName().contains(".val") ||
blocklet.getColumnChunk().column.getColumnName().contains(".")) {
minPercent = "NA";
maxPercent = "NA";
} else {
minPercent =
String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMinPercentage() * 100));
maxPercent =
String.format("%.1f", Math.abs(blocklet.getColumnChunk().getMaxPercentage() * 100));
}
DataFile.ColumnChunk columnChunk = blocklet.columnChunk;
if (columnChunk.column.isDimensionColumn() && DataTypeUtil
// need to consider no dictionary complex column
if (columnChunk.column.hasEncoding(Encoding.DICTIONARY) || blocklet
.getColumnChunk().column.getColumnName().contains(".val") || blocklet
.getColumnChunk().column.getColumnName().contains(".")) {
min = "NA";
max = "NA";
} else if (columnChunk.column.isDimensionColumn() && DataTypeUtil
.isPrimitiveColumn(columnChunk.column.getDataType())) {
min = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMin,
columnChunk.column.getDataType()).toString();
max = DataTypeUtil.getDataBasedOnDataTypeForNoDictionaryColumn(blockletMax,
columnChunk.column.getDataType()).toString();
} else {
if (blockletMin.length > 4) {
min = String.valueOf(ByteUtil.toLong(blockletMin, 0, blockletMin.length));
} else {
min = String.valueOf(ByteUtil.toInt(blockletMin, 0, blockletMin.length));
}
if (blockletMax.length > 4) {
max = String.valueOf(ByteUtil.toLong(blockletMax, 0, blockletMax.length));
} else {
max = String.valueOf(ByteUtil.toInt(blockletMax, 0, blockletMax.length));
if (columnChunk.column.getDataType().equals(DataTypes.TIMESTAMP)) {
min = new java.sql.Timestamp(Long.parseLong(min) / 1000).toString();
max = new java.sql.Timestamp(Long.parseLong(max) / 1000).toString();
}
} else {
min = String.valueOf(DataTypeUtil
.getMeasureObjectFromDataType(blockletMin, columnChunk.column.getDataType()));
max = String.valueOf(DataTypeUtil
.getMeasureObjectFromDataType(blockletMax, columnChunk.column.getDataType()));
}
}
printer.addRow(
Expand Down Expand Up @@ -370,24 +381,26 @@ private void collectStats(String columnName) throws IOException, MemoryException
}

private void collectColumnChunkMeta(String columnName) throws IOException, MemoryException {
DataFile file = dataFiles.entrySet().iterator().next().getValue();
outPuts.add("");
outPuts.add("## Page Meta for column '" + columnName + "' in file " + file.getFilePath());
collectStats(columnName);
for (int i = 0; i < file.getAllBlocklets().size(); i++) {
DataFile.Blocklet blocklet = file.getAllBlocklets().get(i);
DataChunk3 dataChunk3 = blocklet.getColumnChunk().getDataChunk3();
List<DataChunk2> dataChunk2List = dataChunk3.getData_chunk_list();
outPuts.add(String.format("Blocklet %d:", i));
for (Map.Entry<String, DataFile> entry : dataFiles.entrySet()) {
DataFile file = entry.getValue();
outPuts.add("");
outPuts.add("## Page Meta for column '" + columnName + "' in file " + file.getFilePath());
collectStats(columnName);
for (int i = 0; i < file.getAllBlocklets().size(); i++) {
DataFile.Blocklet blocklet = file.getAllBlocklets().get(i);
DataChunk3 dataChunk3 = blocklet.getColumnChunk().getDataChunk3();
List<DataChunk2> dataChunk2List = dataChunk3.getData_chunk_list();
outPuts.add(String.format("Blocklet %d:", i));

// There will be many pages, for debugging purpose,
// just print 3 page for each blocklet is enough
for (int j = 0; j < dataChunk2List.size() && j < 3; j++) {
outPuts.add(String.format("Page %d (offset %d, length %d): %s",
j, dataChunk3.page_offset.get(j), dataChunk3.page_length.get(j),
dataChunk2List.get(j).toString()));
// There will be many pages, for debugging purpose,
// just print 3 page for each blocklet is enough
for (int j = 0; j < dataChunk2List.size() && j < 3; j++) {
outPuts.add(String
.format("Page %d (offset %d, length %d): %s", j, dataChunk3.page_offset.get(j),
dataChunk3.page_length.get(j), dataChunk2List.get(j).toString()));
}
outPuts.add("");
}
outPuts.add("");
}
}

Expand Down

0 comments on commit b9da8be

Please sign in to comment.