Skip to content

Commit

Permalink
Add support to read varbinary column from Parquet fixed length byte a…
Browse files Browse the repository at this point in the history
…rray
  • Loading branch information
majetideepak committed May 22, 2024
1 parent 67699a8 commit cb13fbe
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 8 deletions.
18 changes: 12 additions & 6 deletions velox/dwio/parquet/reader/PageReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,12 +619,18 @@ void PageReader::makeDecoder() {
pageData_, pageData_ + encodedDataSize_);
break;
case thrift::Type::FIXED_LEN_BYTE_ARRAY:
directDecoder_ = std::make_unique<dwio::common::DirectDecoder<true>>(
std::make_unique<dwio::common::SeekableArrayInputStream>(
pageData_, encodedDataSize_),
false,
type_->typeLength_,
true);
if (type_->type()->isVarbinary()) {
stringDecoder_ = std::make_unique<StringDecoder>(
pageData_, pageData_ + encodedDataSize_, type_->typeLength_);
} else {
directDecoder_ =
std::make_unique<dwio::common::DirectDecoder<true>>(
std::make_unique<dwio::common::SeekableArrayInputStream>(
pageData_, encodedDataSize_),
false,
type_->typeLength_,
true);
}
break;
default: {
directDecoder_ = std::make_unique<dwio::common::DirectDecoder<true>>(
Expand Down
17 changes: 15 additions & 2 deletions velox/dwio/parquet/reader/StringDecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@ class StringDecoder {
StringDecoder(const char* start, const char* end)
: bufferStart_(start),
bufferEnd_(end),
lastSafeWord_(end - simd::kPadding),
length_ (-1) {}

lastSafeWord_(end - simd::kPadding) {}
StringDecoder(const char* start, const char* end, int length)
: bufferStart_(start),
bufferEnd_(end),
lastSafeWord_(end - simd::kPadding),
length_(length) {}

void skip(uint64_t numValues) {
skip<false>(numValues, 0, nullptr);
Expand Down Expand Up @@ -62,7 +68,7 @@ class StringDecoder {
}

// We are at a non-null value on a row to visit.
toSkip = visitor.process(readString(), atEnd);
toSkip = visitor.process(length_ > 0 ? readFixedString() : readString(), atEnd);
}
++current;
if (toSkip) {
Expand All @@ -85,9 +91,16 @@ class StringDecoder {
bufferStart_ += length + sizeof(int32_t);
return folly::StringPiece(bufferStart_ - length, length);
}

folly::StringPiece readFixedString() {
bufferStart_ += length_;
return folly::StringPiece(bufferStart_ - length_, length_);
}

const char* bufferStart_;
const char* bufferEnd_;
const char* const lastSafeWord_;
int length_;
};

} // namespace facebook::velox::parquet

0 comments on commit cb13fbe

Please sign in to comment.