Skip to content

Commit

Permalink
PARQUET-1788: Remove UBSan when rep/dev levels are null
Browse files Browse the repository at this point in the history
Closes #6378 from emkornfield/PARQUET_1788 and squashes the following commits:

498c627 <Micah Kornfield> PARQUET-1788: Remove UBSan when rep/dev levels are null

Authored-by: Micah Kornfield <emkornfield@gmail.com>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
emkornfield authored and wesm committed Feb 10, 2020
1 parent 6600a39 commit 4ef8436
Showing 1 changed file with 22 additions and 8 deletions.
30 changes: 22 additions & 8 deletions cpp/src/parquet/column_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,17 @@

namespace parquet {

namespace {

inline const int16_t* AddIfNotNull(const int16_t* base, int64_t offset) {
if (base != nullptr) {
return base + offset;
}
return nullptr;
}

} // namespace

using arrow::Status;
using arrow::compute::Datum;
using arrow::internal::checked_cast;
Expand Down Expand Up @@ -837,8 +848,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
// pagesize limit
int64_t value_offset = 0;
auto WriteChunk = [&](int64_t offset, int64_t batch_size) {
int64_t values_to_write =
WriteLevels(batch_size, def_levels + offset, rep_levels + offset);
int64_t values_to_write = WriteLevels(batch_size, AddIfNotNull(def_levels, offset),
AddIfNotNull(rep_levels, offset));
// PARQUET-780
if (values_to_write > 0) {
DCHECK_NE(nullptr, values);
Expand All @@ -862,8 +873,9 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
auto WriteChunk = [&](int64_t offset, int64_t batch_size) {
int64_t batch_num_values = 0;
int64_t batch_num_spaced_values = 0;
WriteLevelsSpaced(batch_size, def_levels + offset, rep_levels + offset,
&batch_num_values, &batch_num_spaced_values);
WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset),
AddIfNotNull(rep_levels, offset), &batch_num_values,
&batch_num_spaced_values);
WriteValuesSpaced(values + value_offset, batch_num_values, batch_num_spaced_values,
valid_bits, valid_bits_offset + value_offset);
CommitWriteAndCheckPageLimit(batch_size, batch_num_spaced_values);
Expand Down Expand Up @@ -1164,8 +1176,9 @@ Status TypedColumnWriterImpl<DType>::WriteArrowDictionary(const int16_t* def_lev
auto WriteIndicesChunk = [&](int64_t offset, int64_t batch_size) {
int64_t batch_num_values = 0;
int64_t batch_num_spaced_values = 0;
WriteLevelsSpaced(batch_size, def_levels + offset, rep_levels + offset,
&batch_num_values, &batch_num_spaced_values);
WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset),
AddIfNotNull(rep_levels, offset), &batch_num_values,
&batch_num_spaced_values);
dict_encoder->PutIndices(*indices->Slice(value_offset, batch_num_spaced_values));
CommitWriteAndCheckPageLimit(batch_size, batch_num_values);
value_offset += batch_num_spaced_values;
Expand Down Expand Up @@ -1586,8 +1599,9 @@ Status TypedColumnWriterImpl<ByteArrayType>::WriteArrowDense(const int16_t* def_
auto WriteChunk = [&](int64_t offset, int64_t batch_size) {
int64_t batch_num_values = 0;
int64_t batch_num_spaced_values = 0;
WriteLevelsSpaced(batch_size, def_levels + offset, rep_levels + offset,
&batch_num_values, &batch_num_spaced_values);
WriteLevelsSpaced(batch_size, AddIfNotNull(def_levels, offset),
AddIfNotNull(rep_levels, offset), &batch_num_values,
&batch_num_spaced_values);
std::shared_ptr<arrow::Array> data_slice =
array.Slice(value_offset, batch_num_spaced_values);
current_encoder_->Put(*data_slice);
Expand Down

0 comments on commit 4ef8436

Please sign in to comment.