Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-2004: [C++] Add shrink_to_fit parameter to BufferBuilder::Resize, add Reserve method #1486

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 25 additions & 0 deletions cpp/src/arrow/buffer-test.cc
Expand Up @@ -194,4 +194,29 @@ TEST(TestBuffer, SliceMutableBuffer) {
ASSERT_TRUE(slice->Equals(expected));
}

TEST(TestBufferBuilder, ResizeReserve) {
const std::string data = "some data";
auto data_ptr = data.c_str();

BufferBuilder builder;

ASSERT_OK(builder.Append(data_ptr, 9));
ASSERT_EQ(9, builder.length());

ASSERT_OK(builder.Resize(128));
ASSERT_EQ(128, builder.capacity());

// Do not shrink to fit
ASSERT_OK(builder.Resize(64, false));
ASSERT_EQ(128, builder.capacity());

// Shrink to fit
ASSERT_OK(builder.Resize(64));
ASSERT_EQ(64, builder.capacity());

// Reserve elements
ASSERT_OK(builder.Reserve(60));
ASSERT_EQ(128, builder.capacity());
}

} // namespace arrow
41 changes: 30 additions & 11 deletions cpp/src/arrow/buffer.h
Expand Up @@ -25,20 +25,20 @@
#include <string>
#include <type_traits>

#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

class MemoryPool;

// ----------------------------------------------------------------------
// Buffer classes

/// Immutable API for a chunk of bytes which may or may not be owned by the
/// class instance.
/// \class Buffer
/// \brief Object containing a pointer to a piece of contiguous memory with a
/// particular size. Base class does not own its memory
///
/// Buffers have two related notions of length: size and capacity. Size is
/// the number of bytes that might have valid data. Capacity is the number
Expand Down Expand Up @@ -133,7 +133,8 @@ ARROW_EXPORT
std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
const int64_t offset, const int64_t length);

/// A Buffer whose contents can be mutated. May or may not own its data.
/// \class MutableBuffer
/// \brief A Buffer whose contents can be mutated. May or may not own its data.
class ARROW_EXPORT MutableBuffer : public Buffer {
public:
MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
Expand All @@ -148,6 +149,8 @@ class ARROW_EXPORT MutableBuffer : public Buffer {
MutableBuffer() : Buffer(NULLPTR, 0) {}
};

/// \class ResizableBuffer
/// \brief A mutable buffer that can be resized
class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
public:
/// Change buffer reported size to indicated size, allocating memory if
Expand Down Expand Up @@ -190,13 +193,22 @@ class ARROW_EXPORT PoolBuffer : public ResizableBuffer {
MemoryPool* pool_;
};

/// \class BufferBuilder
/// \brief A class for incrementally building a contiguous chunk of in-memory data
class ARROW_EXPORT BufferBuilder {
public:
explicit BufferBuilder(MemoryPool* pool)
explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
: pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}

/// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md
Status Resize(const int64_t elements) {
/// \brief Resizes the buffer to the nearest multiple of 64 bytes
///
/// \param elements the new capacity of the of the builder. Will be rounded
/// up to a multiple of 64 bytes for padding
/// \param shrink_to_fit if new capacity smaller than existing size,
/// reallocate internal buffer. Set to false to avoid reallocations when
/// shrinking the builder
/// \return Status
Status Resize(const int64_t elements, bool shrink_to_fit = true) {
// Resize(0) is a no-op
if (elements == 0) {
return Status::OK();
Expand All @@ -205,7 +217,7 @@ class ARROW_EXPORT BufferBuilder {
buffer_ = std::make_shared<PoolBuffer>(pool_);
}
int64_t old_capacity = capacity_;
RETURN_NOT_OK(buffer_->Resize(elements));
RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
capacity_ = buffer_->capacity();
data_ = buffer_->mutable_data();
if (capacity_ > old_capacity) {
Expand All @@ -214,7 +226,14 @@ class ARROW_EXPORT BufferBuilder {
return Status::OK();
}

Status Append(const uint8_t* data, int64_t length) {
/// \brief Ensure that builder can accommodate the additional number of bytes
/// without the need to perform allocations
///
/// \param size number of additional bytes to make space for
/// \return Status
Status Reserve(const int64_t size) { return Resize(size_ + size, false); }

Status Append(const void* data, int64_t length) {
if (capacity_ < length + size_) {
int64_t new_capacity = BitUtil::NextPower2(length + size_);
RETURN_NOT_OK(Resize(new_capacity));
Expand Down Expand Up @@ -248,7 +267,7 @@ class ARROW_EXPORT BufferBuilder {
}

// Unsafe methods don't check existing size
void UnsafeAppend(const uint8_t* data, int64_t length) {
void UnsafeAppend(const void* data, int64_t length) {
memcpy(data_ + size_, data, static_cast<size_t>(length));
size_ += length;
}
Expand Down