Skip to content

Commit

Permalink
PARQUET-2211: [C++] Print ColumnMetaData.encoding_stats field (#14556)
Browse files Browse the repository at this point in the history
Authored-by: Gang Wu <ustcwg@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
wgtmac authored Nov 6, 2022
1 parent 2cac16b commit 7f067f9
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 7 deletions.
37 changes: 32 additions & 5 deletions cpp/src/parquet/printer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,25 @@ namespace parquet {

class ColumnReader;

namespace {

void PrintPageEncodingStats(std::ostream& stream,
const std::vector<PageEncodingStats>& encoding_stats) {
for (size_t i = 0; i < encoding_stats.size(); ++i) {
const auto& encoding = encoding_stats.at(i);
stream << EncodingToString(encoding.encoding);
if (encoding.page_type == parquet::PageType::DICTIONARY_PAGE) {
// Explicitly tell if this encoding comes from a dictionary page
stream << "(DICT_PAGE)";
}
if (i + 1 != encoding_stats.size()) {
stream << " ";
}
}
}

} // namespace

// ----------------------------------------------------------------------
// ParquetFilePrinter::DebugPrint

Expand Down Expand Up @@ -131,9 +150,13 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
<< " Compression: "
<< ::arrow::internal::AsciiToUpper(
Codec::GetCodecAsString(column_chunk->compression()))
<< ", Encodings:";
for (auto encoding : column_chunk->encodings()) {
stream << " " << EncodingToString(encoding);
<< ", Encodings: ";
if (column_chunk->encoding_stats().empty()) {
for (auto encoding : column_chunk->encodings()) {
stream << EncodingToString(encoding) << " ";
}
} else {
PrintPageEncodingStats(stream, column_chunk->encoding_stats());
}
stream << std::endl
<< " Uncompressed Size: " << column_chunk->total_uncompressed_size()
Expand Down Expand Up @@ -271,8 +294,12 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list<int> selected
<< ::arrow::internal::AsciiToUpper(
Codec::GetCodecAsString(column_chunk->compression()))
<< "\", \"Encodings\": \"";
for (auto encoding : column_chunk->encodings()) {
stream << EncodingToString(encoding) << " ";
if (column_chunk->encoding_stats().empty()) {
for (auto encoding : column_chunk->encodings()) {
stream << EncodingToString(encoding) << " ";
}
} else {
PrintPageEncodingStats(stream, column_chunk->encoding_stats());
}
stream << "\", "
<< "\"UncompressedSize\": \"" << column_chunk->total_uncompressed_size()
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/reader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -547,12 +547,12 @@ Column 1: b (INT32)
--- Rows: 3 ---
Column 0
Values: 18 Statistics Not Set
Compression: SNAPPY, Encodings: RLE PLAIN_DICTIONARY
Compression: SNAPPY, Encodings: PLAIN_DICTIONARY(DICT_PAGE) PLAIN_DICTIONARY
Uncompressed Size: 103, Compressed Size: 104
Column 1
Values: 3, Null Values: 0, Distinct Values: 0
Max: 1, Min: 1
Compression: SNAPPY, Encodings: BIT_PACKED PLAIN_DICTIONARY
Compression: SNAPPY, Encodings: PLAIN_DICTIONARY(DICT_PAGE) PLAIN_DICTIONARY
Uncompressed Size: 52, Compressed Size: 56
)###";
std::string values_output = R"###(--- Values ---
Expand Down

0 comments on commit 7f067f9

Please sign in to comment.