Skip to content

Commit

Permalink
Adds two implementations of a BitRunReader, which returns set/not-set
Browse files Browse the repository at this point in the history
and number of bits in a row.

Adds benchmarks comparing the two implementations under different
distributions.

- Makes use of Adds the BitRunReader for use in Parquet Writing

- Refactors GetBatchedSpaced and GetBatchedSpacedWithDict:

Use a single templated method that adds a template parameter
that the code can share.
Does all checking for out of bounds indices in one go instead
of on each pass through th literal (this is a slight behavior
change as the index returned will be different).
Makes use of the BitRunReader. With exactly alternating bits this shows
a big performance drop, but is generally positive across any random
and/or skewered nullability.

fix type

cast to make appveyor happy

add predict false

one more cast for windows

remove redundant using

try to fix builds

address some comments

inline all methods

remove InvertRemainingBits and use LeastSignificantBitMask (renamed from PartialWordMask)

remove LoadInitialWord, fix compile error

fix lint

Pre-rebase work

iwyu

Fix MSVC warning
  • Loading branch information
emkornfield authored and wesm committed Jun 22, 2020
1 parent 7038533 commit b104766
Show file tree
Hide file tree
Showing 16 changed files with 665 additions and 174 deletions.
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Expand Up @@ -175,6 +175,7 @@ set(ARROW_SRCS
testing/util.cc
util/basic_decimal.cc
util/bit_block_counter.cc
util/bit_run_reader.cc
util/bit_util.cc
util/bitmap.cc
util/bitmap_builders.cc
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array/array_primitive.cc
Expand Up @@ -23,6 +23,7 @@
#include "arrow/array/array_base.h"
#include "arrow/type.h"
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/logging.h"

namespace arrow {
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/bit_block_counter.cc
Expand Up @@ -23,6 +23,7 @@

#include "arrow/buffer.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/ubsan.h"

namespace arrow {
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/arrow/util/bit_block_counter.h
Expand Up @@ -22,9 +22,6 @@
#include <limits>
#include <memory>

#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/ubsan.h"
#include "arrow/util/visibility.h"

namespace arrow {
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/bit_block_counter_test.cc
Expand Up @@ -30,6 +30,7 @@
#include "arrow/testing/util.h"
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"

namespace arrow {
namespace internal {
Expand Down
52 changes: 52 additions & 0 deletions cpp/src/arrow/util/bit_run_reader.cc
@@ -0,0 +1,52 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/util/bit_run_reader.h"

#include <cstdint>

#include "arrow/util/bit_util.h"

namespace arrow {
namespace internal {

BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
: bitmap_(bitmap + (start_offset / 8)),
position_(start_offset % 8),
length_(position_ + length) {
if (ARROW_PREDICT_FALSE(length == 0)) {
word_ = 0;
return;
}

//// On the initial load if there is an offset we need to account for this when
// loading bytes. Every other call to LoadWord() should only occur when
// position_ is a multiple of 64.
current_run_bit_set_ = !BitUtil::GetBit(bitmap, start_offset);
int64_t shift_offset = position_ % 8;
int64_t bits_remaining = (length_ - position_) + shift_offset;
bits_remaining += (bits_remaining % 8) == 0 && shift_offset > 0;

LoadWord(bits_remaining);

// Prepare for inversion in NextRun.
// Clear out any preceding bits.
word_ = word_ & ~BitUtil::LeastSignficantBitMask(position_);
}

} // namespace internal
} // namespace arrow
166 changes: 166 additions & 0 deletions cpp/src/arrow/util/bit_run_reader.h
@@ -0,0 +1,166 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>
#include <cstring>
#include <string>

#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_reader.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {
namespace internal {

struct BitRun {
int64_t length;
// Whether bits are set at this point.
bool set;

std::string ToString() const {
return std::string("{Length: ") + std::to_string(length) +
", set=" + std::to_string(set) + "}";
}
};

static inline bool operator==(const BitRun& lhs, const BitRun& rhs) {
return lhs.length == rhs.length && lhs.set == rhs.set;
}

class BitRunReaderScalar {
public:
BitRunReaderScalar(const uint8_t* bitmap, int64_t start_offset, int64_t length)
: reader_(bitmap, start_offset, length) {}

BitRun NextRun() {
BitRun rl = {/*length=*/0, reader_.IsSet()};
// Advance while the values are equal and not at the end of list.
while (reader_.position() < reader_.length() && reader_.IsSet() == rl.set) {
rl.length++;
reader_.Next();
}
return rl;
}

private:
BitmapReader reader_;
};

#if defined(ARROW_LITTLE_ENDIAN)
/// A convenience class for counting the number of continguous set/unset bits
/// in a bitmap.
class ARROW_EXPORT BitRunReader {
public:
/// \brief Constructs new BitRunReader.
///
/// \param[in] bitmap source data
/// \param[in] start_offset bit offset into the source data
/// \param[in] length number of bits to copy
BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length);

/// Returns a new BitRun containing the number of contiguous
/// bits with the same value. length == 0 indicates the
/// end of the bitmap.
BitRun NextRun() {
if (ARROW_PREDICT_FALSE(position_ >= length_)) {
return {/*length=*/0, false};
}
// This implementation relies on a efficient implementations of
// CountTrailingZeros and assumes that runs are more often then
// not. The logic is to incrementally find the next bit change
// from the current position. This is done by zeroing all
// bits in word_ up to position_ and using the TrailingZeroCount
// to find the index of the next set bit.

// The runs alternate on each call, so flip the bit.
current_run_bit_set_ = !current_run_bit_set_;

int64_t start_position = position_;
int64_t start_bit_offset = start_position & 63;
// Invert the word for proper use of CountTrailingZeros and
// clear bits so CountTrailingZeros can do it magic.
word_ = ~word_ & ~BitUtil::LeastSignficantBitMask(start_bit_offset);

// Go forward until the next change from unset to set.
int64_t new_bits = BitUtil::CountTrailingZeros(word_) - start_bit_offset;
position_ += new_bits;

if (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) &&
ARROW_PREDICT_TRUE(position_ < length_)) {
// Continue extending position while we can advance an entire word.
// (updates position_ accordingly).
AdvanceUntilChange();
}

return {/*length=*/position_ - start_position, current_run_bit_set_};
}

private:
void AdvanceUntilChange() {
int64_t new_bits = 0;
do {
// Advance the position of the bitmap for loading.
bitmap_ += sizeof(uint64_t);
LoadNextWord();
new_bits = BitUtil::CountTrailingZeros(word_);
// Continue calculating run length.
position_ += new_bits;
} while (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) &&
ARROW_PREDICT_TRUE(position_ < length_) && new_bits > 0);
}

void LoadNextWord() { return LoadWord(length_ - position_); }

// Helper method for Loading the next word.
void LoadWord(int64_t bits_remaining) {
word_ = 0;
// we need at least an extra byte in this case.
if (ARROW_PREDICT_TRUE(bits_remaining >= 64)) {
std::memcpy(&word_, bitmap_, 8);
} else {
int64_t bytes_to_load = BitUtil::BytesForBits(bits_remaining);
auto word_ptr = reinterpret_cast<uint8_t*>(&word_);
std::memcpy(word_ptr, bitmap_, bytes_to_load);
// Ensure stoppage at last bit in bitmap by reversing the next higher
// order bit.
BitUtil::SetBitTo(word_ptr, bits_remaining,
!BitUtil::GetBit(word_ptr, bits_remaining - 1));
}

// Two cases:
// 1. For unset, CountTrailingZeros works natually so we don't
// invert the word.
// 2. Otherwise invert so we can use CountTrailingZeros.
if (current_run_bit_set_) {
word_ = ~word_;
}
}
const uint8_t* bitmap_;
int64_t position_;
int64_t length_;
uint64_t word_;
bool current_run_bit_set_;
};
#else
using BitRunReader = BitRunReaderScalar;
#endif

} // namespace internal
} // namespace arrow
6 changes: 6 additions & 0 deletions cpp/src/arrow/util/bit_util.h
Expand Up @@ -139,6 +139,12 @@ constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }

constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }

// Returns a mask for the bit_index lower order bits.
// Only valid for bit_index in the range [0, 64).
constexpr uint64_t LeastSignficantBitMask(int64_t bit_index) {
return (static_cast<uint64_t>(1) << bit_index) - 1;
}

// Returns 'value' rounded up to the nearest multiple of 'factor'
constexpr int64_t RoundUp(int64_t value, int64_t factor) {
return CeilDiv(value, factor) * factor;
Expand Down
68 changes: 68 additions & 0 deletions cpp/src/arrow/util/bit_util_benchmark.cc
Expand Up @@ -28,9 +28,13 @@
#include "arrow/array/array_base.h"
#include "arrow/array/array_primitive.h"
#include "arrow/buffer.h"
#include "arrow/builder.h"
#include "arrow/memory_pool.h"
#include "arrow/result.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/testing/util.h"
#include "arrow/util/bit_run_reader.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap.h"
#include "arrow/util/bitmap_generate.h"
Expand Down Expand Up @@ -195,6 +199,44 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) {
state.SetBytesProcessed(2LL * state.iterations() * nbytes);
}

template <typename BitRunReaderType>
static void BenchmarkBitRunReader(benchmark::State& state, int64_t set_percentage) {
::arrow::random::RandomArrayGenerator rag(/*seed=*/23);
constexpr int64_t kNumBits = 4096;
double set_probability =
static_cast<double>(set_percentage == -1 ? 0 : set_percentage) / 100.0;
std::shared_ptr<Buffer> buffer =
rag.Boolean(kNumBits, set_probability)->data()->buffers[1];

const uint8_t* bitmap = buffer->data();
if (set_percentage == -1) {
internal::BitmapWriter writer(buffer->mutable_data(), /*start_offset=*/0,
/*length=*/kNumBits);
for (int x = 0; x < kNumBits; x++) {
if (x % 2 == 0) {
writer.Set();
} else {
writer.Clear();
}
writer.Next();
}
}

for (auto _ : state) {
{
BitRunReaderType reader(bitmap, 0, kNumBits);
int64_t set_total = 0;
internal::BitRun br;
do {
br = reader.NextRun();
set_total += br.set ? br.length : 0;
} while (br.length != 0);
benchmark::DoNotOptimize(set_total);
}
}
state.SetBytesProcessed(state.iterations() * (kNumBits / 8));
}

template <typename VisitBitsFunctorType>
static void BenchmarkVisitBits(benchmark::State& state, int64_t nbytes) {
std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);
Expand Down Expand Up @@ -277,6 +319,14 @@ static void BitmapReader(benchmark::State& state) {
BenchmarkBitmapReader<internal::BitmapReader>(state, state.range(0));
}

static void BitRunReader(benchmark::State& state) {
BenchmarkBitRunReader<internal::BitRunReader>(state, state.range(0));
}

static void BitRunReaderScalar(benchmark::State& state) {
BenchmarkBitRunReader<internal::BitRunReaderScalar>(state, state.range(0));
}

static void BitmapWriter(benchmark::State& state) {
BenchmarkBitmapWriter<internal::BitmapWriter>(state, state.range(0));
}
Expand Down Expand Up @@ -409,6 +459,24 @@ BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize);
#endif

BENCHMARK(BitmapReader)->Arg(kBufferSize);
BENCHMARK(BitRunReader)
->Arg(-1)
->Arg(0)
->Arg(10)
->Arg(25)
->Arg(50)
->Arg(60)
->Arg(75)
->Arg(99);
BENCHMARK(BitRunReaderScalar)
->Arg(-1)
->Arg(0)
->Arg(10)
->Arg(25)
->Arg(50)
->Arg(60)
->Arg(75)
->Arg(99);
BENCHMARK(VisitBits)->Arg(kBufferSize);
BENCHMARK(VisitBitsUnrolled)->Arg(kBufferSize);
BENCHMARK(SetBitsTo)->Arg(2)->Arg(1 << 4)->Arg(1 << 10)->Arg(1 << 17);
Expand Down

0 comments on commit b104766

Please sign in to comment.