Skip to content

Commit

Permalink
ARROW-8006: [C++] Initialize spaced data when reading nulls from Parquet
Browse files Browse the repository at this point in the history
Closes #6544 from pitrou/ARROW-8006-parquet-uninitialized-data and squashes the following commits:

601eba2 <Wes McKinney> Add unit test that fails without the patch
b614686 <Antoine Pitrou> ARROW-8006:  Initialize spaced data when reading nulls from Parquet

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Wes McKinney <wesm+git@apache.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
pitrou and wesm committed Mar 6, 2020
1 parent f8dddcd commit 8f61c79
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
3 changes: 3 additions & 0 deletions cpp/src/arrow/util/rle_encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count,
DCHECK_GE(bit_width_, 0);
int values_read = 0;
int remaining_nulls = null_count;
T zero = {};

arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, batch_size);

Expand Down Expand Up @@ -383,6 +384,7 @@ inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count,
*out = indices[literals_read];
literals_read++;
} else {
*out = zero;
skipped++;
}
++out;
Expand All @@ -393,6 +395,7 @@ inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count,
remaining_nulls -= skipped;
}
} else {
*out = zero;
++out;
values_read++;
remaining_nulls--;
Expand Down
16 changes: 15 additions & 1 deletion cpp/src/parquet/encoding_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,21 @@ TEST_F(DictEncoding, CheckDecodeIndicesSpaced) {
actual_num_values = dict_decoder_->DecodeIndicesSpaced(
num_values_, null_count_, valid_bits_, 0, builder.get());
}
CheckDict(actual_num_values, *builder);
ASSERT_EQ(actual_num_values, num_values_ - null_count_);
std::shared_ptr<arrow::Array> actual;
ASSERT_OK(builder->Finish(&actual));
ASSERT_ARRAYS_EQUAL(*actual, *expected_dict_);

// Check that null indices are zero-initialized
const auto& dict_actual = checked_cast<const arrow::DictionaryArray&>(*actual);
const auto& indices = checked_cast<const arrow::Int32Array&>(*dict_actual.indices());

auto raw_values = indices.raw_values();
for (int64_t i = 0; i < indices.length(); ++i) {
if (indices.IsNull(i) && raw_values[i] != 0) {
FAIL() << "Null slot not zero-initialized";
}
}
}
}

Expand Down

0 comments on commit 8f61c79

Please sign in to comment.