Skip to content

Commit

Permalink
PARQUET-1785: [C++] Implement ByteStreamSplitDecoder::DecodeArrow and…
Browse files Browse the repository at this point in the history
… refactor tests

The patch implements ByteStreamSplitDecoder::DecodeArrow(...) and
refactors the ByteStreamSplit Encode/Encode tests to better
utilize the existing test facilities.

Closes #6471 from martinradev/byte_stream_split_submit and squashes the following commits:

664b2e8 <Antoine Pitrou> Enable missing CheckOnlyEncode test
2cfd7c1 <Antoine Pitrou> Workaround ARROW-7944
81423ab <Martin Radev> PARQUET-1785: Move algorithm header to the top
d41a85f <Martin Radev> PARQUET-1785: Add missing header include
1418f58 <Martin Radev> PARQUET-1785: fix style
024fe3f <Martin Radev> PARQUET-1785: Enable fewer elements BYTE_STREAM_SPLIT PutSpaced test
5601f57 <Martin Radev> PARQUET-1785: Add separate tests for enc/dec BYTE_STREAM_SPLIT paths
6da373d <Martin Radev> PARQUET-1785:  Implement ByteStreamSplitDecoder::DecodeArrow and refactor tests

Lead-authored-by: Martin Radev <martin.b.radev@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
martinradev and pitrou committed Feb 26, 2020
1 parent 76db492 commit 3a4023a
Show file tree
Hide file tree
Showing 3 changed files with 218 additions and 131 deletions.
32 changes: 31 additions & 1 deletion cpp/src/parquet/encoding.cc
Expand Up @@ -2436,7 +2436,37 @@ template <typename DType>
int ByteStreamSplitDecoder<DType>::DecodeArrow(
int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
typename EncodingTraits<DType>::Accumulator* builder) {
ParquetException::NYI("DecodeArrow for ByteStreamSplitDecoder");
constexpr size_t num_streams = sizeof(T);
constexpr int value_size = static_cast<int>(num_streams);
int values_decoded = num_values - null_count;
if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) {
ParquetException::EofException();
}

PARQUET_THROW_NOT_OK(builder->Reserve(num_values));

arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);
const int num_decoded_previously = num_values_in_buffer - num_values_;
int offset = 0;
for (int i = 0; i < num_values; ++i) {
if (bit_reader.IsSet()) {
uint8_t gathered_byte_data[num_streams];
for (size_t b = 0; b < num_streams; ++b) {
const size_t byte_index =
b * num_values_in_buffer + num_decoded_previously + offset;
gathered_byte_data[b] = data_[byte_index];
}
builder->UnsafeAppend(arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]));
++offset;
} else {
builder->UnsafeAppendNull();
}
bit_reader.Next();
}

num_values_ -= values_decoded;
len_ -= sizeof(num_streams) * values_decoded;
return values_decoded;
}

template <typename DType>
Expand Down

0 comments on commit 3a4023a

Please sign in to comment.