Skip to content

Commit

Permalink
ARROW-317: Add Slice, Copy methods to Buffer
Browse files Browse the repository at this point in the history
There's also a little bit of naming cleanup in `bit-util.h`, pardon the diff noise.

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #177 from wesm/ARROW-317 and squashes the following commits:

0666b22 [Wes McKinney] Fix up pyarrow usage of BitUtil
3ab4e7a [Wes McKinney] Add Slice, Copy methods to Buffer
cb9519d [Wes McKinney] Use more conforming names in bit-util.h
  • Loading branch information
wesm committed Oct 18, 2016
1 parent 732a205 commit 676c32c
Show file tree
Hide file tree
Showing 18 changed files with 173 additions and 90 deletions.
3 changes: 2 additions & 1 deletion cpp/src/arrow/array.cc
Expand Up @@ -19,6 +19,7 @@

#include <cstdint>

#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
#include "arrow/util/status.h"

Expand All @@ -43,7 +44,7 @@ bool Array::EqualsExact(const Array& other) const {
return false;
}
if (null_count_ > 0) {
return null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
return null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
}
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array.h
Expand Up @@ -45,7 +45,7 @@ class ARROW_EXPORT Array {

// Determine if a slot is null. For inner loops. Does *not* boundscheck
bool IsNull(int i) const {
return null_count_ > 0 && util::bit_not_set(null_bitmap_data_, i);
return null_count_ > 0 && BitUtil::BitNotSet(null_bitmap_data_, i);
}

int32_t length() const { return length_; }
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/arrow/builder.cc
Expand Up @@ -31,7 +31,7 @@ Status ArrayBuilder::AppendToBitmap(bool is_valid) {
// TODO(emkornfield) doubling isn't great default allocation practice
// see https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
// fo discussion
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
RETURN_NOT_OK(Resize(BitUtil::NextPower2(capacity_ + 1)));
}
UnsafeAppendToBitmap(is_valid);
return Status::OK();
Expand All @@ -45,7 +45,7 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int32_t length)
}

Status ArrayBuilder::Init(int32_t capacity) {
int32_t to_alloc = util::ceil_byte(capacity) / 8;
int32_t to_alloc = BitUtil::CeilByte(capacity) / 8;
null_bitmap_ = std::make_shared<PoolBuffer>(pool_);
RETURN_NOT_OK(null_bitmap_->Resize(to_alloc));
// Buffers might allocate more then necessary to satisfy padding requirements
Expand All @@ -58,7 +58,7 @@ Status ArrayBuilder::Init(int32_t capacity) {

Status ArrayBuilder::Resize(int32_t new_bits) {
if (!null_bitmap_) { return Init(new_bits); }
int32_t new_bytes = util::ceil_byte(new_bits) / 8;
int32_t new_bytes = BitUtil::CeilByte(new_bits) / 8;
int32_t old_bytes = null_bitmap_->size();
RETURN_NOT_OK(null_bitmap_->Resize(new_bytes));
null_bitmap_data_ = null_bitmap_->mutable_data();
Expand All @@ -82,7 +82,7 @@ Status ArrayBuilder::Advance(int32_t elements) {
Status ArrayBuilder::Reserve(int32_t elements) {
if (length_ + elements > capacity_) {
// TODO(emkornfield) power of 2 growth is potentially suboptimal
int32_t new_capacity = util::next_power2(length_ + elements);
int32_t new_capacity = BitUtil::NextPower2(length_ + elements);
return Resize(new_capacity);
}
return Status::OK();
Expand All @@ -96,7 +96,7 @@ Status ArrayBuilder::SetNotNull(int32_t length) {

void ArrayBuilder::UnsafeAppendToBitmap(bool is_valid) {
if (is_valid) {
util::set_bit(null_bitmap_data_, length_);
BitUtil::SetBit(null_bitmap_data_, length_);
} else {
++null_count_;
}
Expand All @@ -118,7 +118,7 @@ void ArrayBuilder::UnsafeSetNotNull(int32_t length) {
const int32_t new_length = length + length_;
// TODO(emkornfield) Optimize for large values of length?
for (int32_t i = length_; i < new_length; ++i) {
util::set_bit(null_bitmap_data_, i);
BitUtil::SetBit(null_bitmap_data_, i);
}
length_ = new_length;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/column-benchmark.cc
Expand Up @@ -29,7 +29,7 @@ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
auto data = std::make_shared<PoolBuffer>(pool);
auto null_bitmap = std::make_shared<PoolBuffer>(pool);
data->Resize(length * sizeof(typename ArrayType::value_type));
null_bitmap->Resize(util::bytes_for_bits(length));
null_bitmap->Resize(BitUtil::BytesForBits(length));
return std::make_shared<ArrayType>(length, data, 10, null_bitmap);
}
} // anonymous namespace
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/ipc/adapter.cc
Expand Up @@ -37,6 +37,7 @@
#include "arrow/types/primitive.h"
#include "arrow/types/string.h"
#include "arrow/types/struct.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
#include "arrow/util/logging.h"
#include "arrow/util/status.h"
Expand All @@ -49,7 +50,7 @@ namespace ipc {

namespace {
Status CheckMultipleOf64(int64_t size) {
if (util::is_multiple_of_64(size)) { return Status::OK(); }
if (BitUtil::IsMultipleOf64(size)) { return Status::OK(); }
return Status::Invalid(
"Attempted to write a buffer that "
"wasn't a multiple of 64 bytes");
Expand Down Expand Up @@ -155,7 +156,7 @@ class RecordBatchWriter {
// The buffer might be null if we are handling zero row lengths.
if (buffer) {
size = buffer->size();
padding = util::RoundUpToMultipleOf64(size) - size;
padding = BitUtil::RoundUpToMultipleOf64(size) - size;
}

// TODO(wesm): We currently have no notion of shared memory page id's,
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/ipc/test-common.h
Expand Up @@ -31,6 +31,7 @@
#include "arrow/types/primitive.h"
#include "arrow/types/string.h"
#include "arrow/types/struct.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
#include "arrow/util/memory-pool.h"

Expand Down Expand Up @@ -263,7 +264,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
null_bytes[0] = 0;
std::shared_ptr<Buffer> null_bitmask;
RETURN_NOT_OK(util::bytes_to_bits(null_bytes, &null_bitmask));
RETURN_NOT_OK(BitUtil::BytesToBits(null_bytes, &null_bitmask));
ArrayPtr with_nulls(
new StructArray(type, list_batch->num_rows(), columns, 1, null_bitmask));

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/test-util.h
Expand Up @@ -69,7 +69,7 @@ class TestBase : public ::testing::Test {
auto data = std::make_shared<PoolBuffer>(pool_);
auto null_bitmap = std::make_shared<PoolBuffer>(pool_);
EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
EXPECT_OK(null_bitmap->Resize(util::bytes_for_bits(length)));
EXPECT_OK(null_bitmap->Resize(BitUtil::BytesForBits(length)));
return std::make_shared<ArrayType>(length, data, 10, null_bitmap);
}

Expand Down Expand Up @@ -152,7 +152,7 @@ static inline int bitmap_popcount(const uint8_t* data, int length) {
// versions of popcount but the code complexity is likely not worth it)
const int loop_tail_index = fast_counts * pop_len;
for (int i = loop_tail_index; i < length; ++i) {
if (util::get_bit(data, i)) { ++count; }
if (BitUtil::GetBit(data, i)) { ++count; }
}

return count;
Expand All @@ -170,7 +170,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(const std::vector<uint8_t>& bytes)
std::shared_ptr<Buffer> out;

// TODO(wesm): error checking
util::bytes_to_bits(bytes, &out);
BitUtil::BytesToBits(bytes, &out);
return out;
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/list.cc
Expand Up @@ -30,7 +30,7 @@ bool ListArray::EqualsExact(const ListArray& other) const {
bool equal_null_bitmap = true;
if (null_count_ > 0) {
equal_null_bitmap =
null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
}

if (!equal_null_bitmap) { return false; }
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/arrow/types/primitive-test.cc
Expand Up @@ -236,7 +236,7 @@ void TestPrimitiveBuilder<PBoolean>::Check(

for (int i = 0; i < result->length(); ++i) {
if (nullable) { ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; }
bool actual = util::get_bit(result->raw_data(), i);
bool actual = BitUtil::GetBit(result->raw_data(), i);
ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
}
ASSERT_TRUE(result->EqualsExact(*expected.get()));
Expand All @@ -258,8 +258,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) {

int n = 1000;
ASSERT_OK(this->builder_->Reserve(n));
ASSERT_EQ(util::next_power2(n), this->builder_->capacity());
ASSERT_EQ(util::next_power2(TypeTraits<Type>::bytes_required(n)),
ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
ASSERT_EQ(BitUtil::NextPower2(TypeTraits<Type>::bytes_required(n)),
this->builder_->data()->size());

// unsure if this should go in all builder classes
Expand Down Expand Up @@ -409,10 +409,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
}

ASSERT_EQ(size, this->builder_->length());
ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());

ASSERT_EQ(size, this->builder_nn_->length());
ASSERT_EQ(util::next_power2(size), this->builder_nn_->capacity());
ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());

this->Check(this->builder_, true);
this->Check(this->builder_nn_, false);
Expand Down Expand Up @@ -444,7 +444,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
ASSERT_OK(this->builder_nn_->Append(draws.data() + K, size - K));

ASSERT_EQ(size, this->builder_->length());
ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());

this->Check(this->builder_, true);
this->Check(this->builder_nn_, false);
Expand Down Expand Up @@ -472,7 +472,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
ASSERT_EQ(cap, this->builder_->capacity());

ASSERT_EQ(TypeTraits<Type>::bytes_required(cap), this->builder_->data()->size());
ASSERT_EQ(util::bytes_for_bits(cap), this->builder_->null_bitmap()->size());
ASSERT_EQ(BitUtil::BytesForBits(cap), this->builder_->null_bitmap()->size());
}

TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
Expand All @@ -484,7 +484,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
ASSERT_OK(this->builder_->Advance(100));
ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));

ASSERT_EQ(util::next_power2(kMinBuilderCapacity + 100), this->builder_->capacity());
ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
}

} // namespace arrow
13 changes: 7 additions & 6 deletions cpp/src/arrow/types/primitive.cc
Expand Up @@ -19,6 +19,7 @@

#include <memory>

#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
#include "arrow/util/logging.h"

Expand All @@ -41,7 +42,7 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {

if (null_count_ > 0) {
bool equal_bitmap =
null_bitmap_->Equals(*other.null_bitmap_, util::ceil_byte(length_) / 8);
null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8);
if (!equal_bitmap) { return false; }

const uint8_t* this_data = raw_data_;
Expand Down Expand Up @@ -156,9 +157,9 @@ Status PrimitiveBuilder<BooleanType>::Append(
if ((valid_bytes != nullptr) && !valid_bytes[i]) continue;

if (values[i] > 0) {
util::set_bit(raw_data_, length_ + i);
BitUtil::SetBit(raw_data_, length_ + i);
} else {
util::clear_bit(raw_data_, length_ + i);
BitUtil::ClearBit(raw_data_, length_ + i);
}
}

Expand Down Expand Up @@ -196,20 +197,20 @@ bool BooleanArray::EqualsExact(const BooleanArray& other) const {

if (null_count_ > 0) {
bool equal_bitmap =
null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
if (!equal_bitmap) { return false; }

const uint8_t* this_data = raw_data_;
const uint8_t* other_data = other.raw_data_;

for (int i = 0; i < length_; ++i) {
if (!IsNull(i) && util::get_bit(this_data, i) != util::get_bit(other_data, i)) {
if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data, i)) {
return false;
}
}
return true;
} else {
return data_->Equals(*other.data_, util::bytes_for_bits(length_));
return data_->Equals(*other.data_, BitUtil::BytesForBits(length_));
}
}

Expand Down
12 changes: 6 additions & 6 deletions cpp/src/arrow/types/primitive.h
Expand Up @@ -173,7 +173,7 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {

// Does not capacity-check; make sure to call Reserve beforehand
void UnsafeAppend(value_type val) {
util::set_bit(null_bitmap_data_, length_);
BitUtil::SetBit(null_bitmap_data_, length_);
raw_data_[length_++] = val;
}

Expand Down Expand Up @@ -290,15 +290,15 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {

const uint8_t* raw_data() const { return reinterpret_cast<const uint8_t*>(raw_data_); }

bool Value(int i) const { return util::get_bit(raw_data(), i); }
bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); }
};

template <>
struct TypeTraits<BooleanType> {
typedef BooleanArray ArrayType;

static inline int bytes_required(int elements) {
return util::bytes_for_bits(elements);
return BitUtil::BytesForBits(elements);
}
};

Expand All @@ -314,11 +314,11 @@ class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType> {
// Scalar append
Status Append(bool val) {
Reserve(1);
util::set_bit(null_bitmap_data_, length_);
BitUtil::SetBit(null_bitmap_data_, length_);
if (val) {
util::set_bit(raw_data_, length_);
BitUtil::SetBit(raw_data_, length_);
} else {
util::clear_bit(raw_data_, length_);
BitUtil::ClearBit(raw_data_, length_);
}
++length_;
return Status::OK();
Expand Down
36 changes: 18 additions & 18 deletions cpp/src/arrow/util/bit-util-test.cc
Expand Up @@ -22,33 +22,33 @@
namespace arrow {

TEST(UtilTests, TestIsMultipleOf64) {
using util::is_multiple_of_64;
EXPECT_TRUE(is_multiple_of_64(64));
EXPECT_TRUE(is_multiple_of_64(0));
EXPECT_TRUE(is_multiple_of_64(128));
EXPECT_TRUE(is_multiple_of_64(192));
EXPECT_FALSE(is_multiple_of_64(23));
EXPECT_FALSE(is_multiple_of_64(32));
using BitUtil::IsMultipleOf64;
EXPECT_TRUE(IsMultipleOf64(64));
EXPECT_TRUE(IsMultipleOf64(0));
EXPECT_TRUE(IsMultipleOf64(128));
EXPECT_TRUE(IsMultipleOf64(192));
EXPECT_FALSE(IsMultipleOf64(23));
EXPECT_FALSE(IsMultipleOf64(32));
}

TEST(UtilTests, TestNextPower2) {
using util::next_power2;
using BitUtil::NextPower2;

ASSERT_EQ(8, next_power2(6));
ASSERT_EQ(8, next_power2(8));
ASSERT_EQ(8, NextPower2(6));
ASSERT_EQ(8, NextPower2(8));

ASSERT_EQ(1, next_power2(1));
ASSERT_EQ(256, next_power2(131));
ASSERT_EQ(1, NextPower2(1));
ASSERT_EQ(256, NextPower2(131));

ASSERT_EQ(1024, next_power2(1000));
ASSERT_EQ(1024, NextPower2(1000));

ASSERT_EQ(4096, next_power2(4000));
ASSERT_EQ(4096, NextPower2(4000));

ASSERT_EQ(65536, next_power2(64000));
ASSERT_EQ(65536, NextPower2(64000));

ASSERT_EQ(1LL << 32, next_power2((1LL << 32) - 1));
ASSERT_EQ(1LL << 31, next_power2((1LL << 31) - 1));
ASSERT_EQ(1LL << 62, next_power2((1LL << 62) - 1));
ASSERT_EQ(1LL << 32, NextPower2((1LL << 32) - 1));
ASSERT_EQ(1LL << 31, NextPower2((1LL << 31) - 1));
ASSERT_EQ(1LL << 62, NextPower2((1LL << 62) - 1));
}

} // namespace arrow

0 comments on commit 676c32c

Please sign in to comment.