Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/paimon/format/parquet/parquet_file_batch_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ Status ParquetFileBatchReader::SetReadSchema(
read_row_groups_ = row_groups;
read_column_indices_ = column_indices;

metrics_->SetCounter(ParquetMetrics::READ_ROW_GROUPS_TOTAL, reader_->GetNumberOfRowGroups());
metrics_->SetCounter(ParquetMetrics::READ_ROW_GROUPS_FILTERED, row_groups.size());

PAIMON_ASSIGN_OR_RAISE(std::set<int32_t> ordered_row_groups,
reader_->FilterRowGroupsByReadRanges(read_ranges_, read_row_groups_));
return reader_->PrepareForReadingLazy(ordered_row_groups, read_column_indices_);
Expand Down Expand Up @@ -243,6 +246,12 @@ Result<BatchReader::ReadBatch> ParquetFileBatchReader::NextBatch() {
std::unique_ptr<ArrowArray> c_array = std::make_unique<ArrowArray>();
std::unique_ptr<ArrowSchema> c_schema = std::make_unique<ArrowSchema>();
PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*array, c_array.get(), c_schema.get()));

read_rows_ += array->length();
read_batch_count_++;
metrics_->SetCounter(ParquetMetrics::READ_ROWS, read_rows_);
metrics_->SetCounter(ParquetMetrics::READ_BATCH_COUNT, read_batch_count_);

return make_pair(std::move(c_array), std::move(c_schema));
}

Expand Down
3 changes: 3 additions & 0 deletions src/paimon/format/parquet/parquet_file_batch_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,9 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader {

std::shared_ptr<Metrics> metrics_;

uint64_t read_rows_ = 0;
uint64_t read_batch_count_ = 0;

// last time set read schema
std::vector<int32_t> read_row_groups_;
std::vector<int32_t> read_column_indices_;
Expand Down
6 changes: 6 additions & 0 deletions src/paimon/format/parquet/parquet_format_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ static constexpr uint32_t DEFAULT_PARQUET_READ_PREDICATE_NODE_COUNT_LIMIT = 512;
class ParquetMetrics {
public:
static inline const char WRITE_RECORD_COUNT[] = "parquet.write.record.count";

// read
static inline const char READ_ROW_GROUPS_TOTAL[] = "parquet.read.row-groups.total";
static inline const char READ_ROW_GROUPS_FILTERED[] = "parquet.read.row-groups.filtered";
static inline const char READ_ROWS[] = "parquet.read.rows";
static inline const char READ_BATCH_COUNT[] = "parquet.read.batch-count";
};
Comment thread
lxy-9602 marked this conversation as resolved.

} // namespace paimon::parquet
Loading