Skip to content

Commit

Permalink
apacheGH-36417: [C++] Add Buffer::data_as, Buffer::mutable_data_as
Browse files Browse the repository at this point in the history
  • Loading branch information
bkietz committed Jul 6, 2023
1 parent 60fdc25 commit f2c9b81
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 66 deletions.
4 changes: 2 additions & 2 deletions cpp/src/arrow/array/array_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ class ARROW_EXPORT Array {
/// \see GetNullCount
int64_t ComputeLogicalNullCount() const;

std::shared_ptr<DataType> type() const { return data_->type; }
const std::shared_ptr<DataType>& type() const { return data_->type; }
Type::type type_id() const { return data_->type->id(); }

/// Buffer for the validity (null) bitmap, if any. Note that Union types
Expand Down Expand Up @@ -251,7 +251,7 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
int64_t null_count = kUnknownNullCount, int64_t offset = 0);

/// Does not account for any slice offset
std::shared_ptr<Buffer> values() const { return data_->buffers[1]; }
const std::shared_ptr<Buffer>& values() const { return data_->buffers[1]; }

protected:
PrimitiveArray() : raw_values_(NULLPTR) {}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ using internal::CopyBitmap;
// ----------------------------------------------------------------------
// DictionaryArray

std::shared_ptr<Array> DictionaryArray::indices() const { return indices_; }
const std::shared_ptr<Array>& DictionaryArray::indices() const { return indices_; }

int64_t DictionaryArray::GetValueIndex(int64_t i) const {
const uint8_t* indices_data = data_->buffers[1]->data();
Expand Down Expand Up @@ -106,8 +106,9 @@ DictionaryArray::DictionaryArray(const std::shared_ptr<DataType>& type,
SetData(data);
}

std::shared_ptr<Array> DictionaryArray::dictionary() const {
const std::shared_ptr<Array>& DictionaryArray::dictionary() const {
if (!dictionary_) {
// FIXME this isn't thread safe
dictionary_ = MakeArray(data_->dictionary);
}
return dictionary_;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/array/array_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ class ARROW_EXPORT DictionaryArray : public Array {

/// \brief Return the dictionary for this array, which is stored as
/// a member of the ArrayData internal structure
std::shared_ptr<Array> dictionary() const;
std::shared_ptr<Array> indices() const;
const std::shared_ptr<Array>& dictionary() const;
const std::shared_ptr<Array>& indices() const;

/// \brief Return the ith value of indices, cast to int64_t. Not recommended
/// for use in performance-sensitive code. Does not validate whether the
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -470,11 +470,11 @@ const FixedSizeListType* FixedSizeListArray::list_type() const {
return checked_cast<const FixedSizeListType*>(data_->type.get());
}

std::shared_ptr<DataType> FixedSizeListArray::value_type() const {
const std::shared_ptr<DataType>& FixedSizeListArray::value_type() const {
return list_type()->value_type();
}

std::shared_ptr<Array> FixedSizeListArray::values() const { return values_; }
const std::shared_ptr<Array>& FixedSizeListArray::values() const { return values_; }

Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
const std::shared_ptr<Array>& values, int32_t list_size) {
Expand Down Expand Up @@ -611,9 +611,10 @@ const std::shared_ptr<Array>& StructArray::field(int i) const {
return boxed_fields_[i];
}

std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) const {
const std::shared_ptr<Array>& StructArray::GetFieldByName(const std::string& name) const {
static std::shared_ptr<Array> null;
int i = struct_type()->GetFieldIndex(name);
return i == -1 ? nullptr : field(i);
return i == -1 ? null : field(i);
}

Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
Expand Down
20 changes: 10 additions & 10 deletions cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ class BaseListArray : public Array {
/// \brief Return array object containing the list's values
///
/// Note that this buffer does not account for any slice offset or length.
std::shared_ptr<Array> values() const { return values_; }
const std::shared_ptr<Array>& values() const { return values_; }

/// Note that this buffer does not account for any slice offset or length.
std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[1]; }
const std::shared_ptr<Buffer>& value_offsets() const { return data_->buffers[1]; }

std::shared_ptr<DataType> value_type() const { return list_type_->value_type(); }
const std::shared_ptr<DataType>& value_type() const { return list_type_->value_type(); }

/// Return pointer to raw value offsets accounting for any slice offset
const offset_type* raw_value_offsets() const {
Expand Down Expand Up @@ -269,10 +269,10 @@ class ARROW_EXPORT MapArray : public ListArray {
const MapType* map_type() const { return map_type_; }

/// \brief Return array object containing all map keys
std::shared_ptr<Array> keys() const { return keys_; }
const std::shared_ptr<Array>& keys() const { return keys_; }

/// \brief Return array object containing all mapped items
std::shared_ptr<Array> items() const { return items_; }
const std::shared_ptr<Array>& items() const { return items_; }

/// Validate child data before constructing the actual MapArray.
static Status ValidateChildData(
Expand Down Expand Up @@ -310,9 +310,9 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
const FixedSizeListType* list_type() const;

/// \brief Return array object containing the list's values
std::shared_ptr<Array> values() const;
const std::shared_ptr<Array>& values() const;

std::shared_ptr<DataType> value_type() const;
const std::shared_ptr<DataType>& value_type() const;

// The following functions will not perform boundschecking
int64_t value_offset(int64_t i) const {
Expand Down Expand Up @@ -402,7 +402,7 @@ class ARROW_EXPORT StructArray : public Array {
const ArrayVector& fields() const;

/// Returns null if name not found
std::shared_ptr<Array> GetFieldByName(const std::string& name) const;
const std::shared_ptr<Array>& GetFieldByName(const std::string& name) const;

/// \brief Flatten this array as a vector of arrays, one for each field
///
Expand Down Expand Up @@ -432,7 +432,7 @@ class ARROW_EXPORT UnionArray : public Array {
using type_code_t = int8_t;

/// Note that this buffer does not account for any slice offset
std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
const std::shared_ptr<Buffer>& type_codes() const { return data_->buffers[1]; }

const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }

Expand Down Expand Up @@ -571,7 +571,7 @@ class ARROW_EXPORT DenseUnionArray : public UnionArray {
}

/// Note that this buffer does not account for any slice offset
std::shared_ptr<Buffer> value_offsets() const { return data_->buffers[2]; }
const std::shared_ptr<Buffer>& value_offsets() const { return data_->buffers[2]; }

int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }

Expand Down
9 changes: 9 additions & 0 deletions cpp/src/arrow/array/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,15 @@ struct ARROW_EXPORT BufferSpan {
int64_t size = 0;
// Pointer back to buffer that owns this memory
const std::shared_ptr<Buffer>* owner = NULLPTR;

template <typename T>
const T* data_as() const {
return reinterpret_cast<const T*>(data);
}
template <typename T>
T* mutable_data_as() {
return reinterpret_cast<T*>(data);
}
};

/// \brief EXPERIMENTAL: A non-owning ArrayData reference that is cheaply
Expand Down
43 changes: 41 additions & 2 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ namespace arrow {
/// The following invariant is always true: Size <= Capacity
class ARROW_EXPORT Buffer {
public:
Buffer() = delete;

/// \brief Construct from buffer and size without copying memory
///
/// \param[in] data a memory buffer
Expand Down Expand Up @@ -137,6 +139,25 @@ class ARROW_EXPORT Buffer {
/// \return a new Buffer instance
static std::shared_ptr<Buffer> FromString(std::string data);

/// \brief Construct an immutable buffer that takes ownership of the contents
/// of an std::vector (without copying it).
///
/// \param[in] data a string to own
/// \return a new Buffer instance
template <typename T>
static std::shared_ptr<Buffer> FromVector(std::vector<T> vec) {
auto* data = reinterpret_cast<uint8_t*>(vec.data());
auto size = static_cast<int64_t>(vec.size());
return std::shared_ptr<Buffer>(
new Buffer{data, size},
[vec = std::move(
vec) // Keep the vector's buffer alive inside the destructor until after we
// have deleted the Buffer. Note we can't use this trick in FromString
// since std::string's data is inline for short strings so moving
// invalidates pointers into the string's buffer.
](Buffer* buffer) { delete buffer; });
}

/// \brief Create buffer referencing typed memory with some length without
/// copying
/// \param[in] data the typed memory as C array
Expand Down Expand Up @@ -167,7 +188,7 @@ class ARROW_EXPORT Buffer {
/// \brief View buffer contents as a std::string_view
/// \return std::string_view
explicit operator std::string_view() const {
return std::string_view(reinterpret_cast<const char*>(data_), size_);
return {reinterpret_cast<const char*>(data_), static_cast<size_t>(size_)};
}

/// \brief View buffer contents as a util::bytes_view
Expand All @@ -187,6 +208,15 @@ class ARROW_EXPORT Buffer {
return ARROW_PREDICT_TRUE(is_cpu_) ? data_ : NULLPTR;
}

/// \brief Return a pointer to the buffer's data cast to a specific type
///
/// The buffer has to be a CPU buffer (`is_cpu()` is true).
/// Otherwise, an assertion may be thrown or a null pointer may be returned.
template <typename T>
const T* data_as() const {
return reinterpret_cast<const T*>(data());
}

/// \brief Return a writable pointer to the buffer's data
///
/// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
Expand All @@ -204,6 +234,16 @@ class ARROW_EXPORT Buffer {
: NULLPTR;
}

/// \brief Return a writable pointer to the buffer's data cast to a specific type
///
/// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
/// are true). Otherwise, an assertion may be thrown or a null pointer may
/// be returned.
template <typename T>
T* mutable_data_as() {
return reinterpret_cast<T*>(mutable_data());
}

/// \brief Return the device address of the buffer's data
uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }

Expand Down Expand Up @@ -312,7 +352,6 @@ class ARROW_EXPORT Buffer {
}

private:
Buffer() = delete;
ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
};

Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/buffer_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ class ARROW_EXPORT BufferBuilder {
return Status::OK();
}

/// \brief Append the given data to the buffer
///
/// The buffer is automatically expanded if necessary.
Status Append(std::string_view v) { return Append(v.data(), v.size()); }

/// \brief Append copies of a value to the buffer
///
/// The buffer is automatically expanded if necessary.
Expand All @@ -138,6 +143,7 @@ class ARROW_EXPORT BufferBuilder {
memcpy(data_ + size_, data, static_cast<size_t>(length));
size_ += length;
}
void UnsafeAppend(std::string_view v) { UnsafeAppend(v.data(), v.size()); }

void UnsafeAppend(const int64_t num_copies, uint8_t value) {
memset(data_ + size_, value, static_cast<size_t>(num_copies));
Expand Down Expand Up @@ -196,6 +202,14 @@ class ARROW_EXPORT BufferBuilder {
int64_t length() const { return size_; }
const uint8_t* data() const { return data_; }
uint8_t* mutable_data() { return data_; }
template <typename T>
const T* data_as() const {
return reinterpret_cast<const T*>(data());
}
template <typename T>
T* mutable_data_as() {
return reinterpret_cast<T*>(mutable_data());
}

private:
std::shared_ptr<ResizableBuffer> buffer_;
Expand Down

0 comments on commit f2c9b81

Please sign in to comment.