Skip to content

Commit

Permalink
Complete refactoring, fix up IPC tests for flattened string/binary bu…
Browse files Browse the repository at this point in the history
…ffer/metadata layout

Change-Id: I15e9f14bbfcd549f14436b67efcde62e20e83890
  • Loading branch information
wesm committed Oct 17, 2016
1 parent ae64f2e commit bd70cab
Show file tree
Hide file tree
Showing 12 changed files with 71 additions and 89 deletions.
47 changes: 23 additions & 24 deletions cpp/src/arrow/ipc/adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,6 @@ static bool IsPrimitive(const DataType* type) {
}
}

static bool IsListType(const DataType* type) {
DCHECK(type != nullptr);
switch (type->type) {
// TODO(emkornfield) grouping like this are used in a few places in the
// code consider using pattern like:
// http://stackoverflow.com/questions/26784685/c-macro-for-calling-function-based-on-enum-type
//
case Type::BINARY:
case Type::LIST:
case Type::STRING:
return true;
default:
return false;
}
}

// ----------------------------------------------------------------------
// Record batch write path

Expand All @@ -115,7 +99,11 @@ Status VisitArray(const Array* arr, std::vector<flatbuf::FieldNode>* field_nodes
if (IsPrimitive(arr_type)) {
const auto prim_arr = static_cast<const PrimitiveArray*>(arr);
buffers->push_back(prim_arr->data());
} else if (IsListType(arr_type)) {
} else if (arr->type_enum() == Type::STRING || arr->type_enum() == Type::BINARY) {
const auto binary_arr = static_cast<const BinaryArray*>(arr);
buffers->push_back(binary_arr->offsets());
buffers->push_back(binary_arr->data());
} else if (arr->type_enum() == Type::LIST) {
const auto list_arr = static_cast<const ListArray*>(arr);
buffers->push_back(list_arr->offset_buffer());
RETURN_NOT_OK(VisitArray(
Expand Down Expand Up @@ -331,9 +319,21 @@ class RecordBatchReader::RecordBatchReaderImpl {
}
return MakePrimitiveArray(
type, field_meta.length, data, field_meta.null_count, null_bitmap, out);
}
} else if (type->type == Type::STRING || type->type == Type::BINARY) {
std::shared_ptr<Buffer> offsets;
std::shared_ptr<Buffer> values;
RETURN_NOT_OK(GetBuffer(buffer_index_++, &offsets));
RETURN_NOT_OK(GetBuffer(buffer_index_++, &values));

if (IsListType(type.get())) {
if (type->type == Type::STRING) {
*out = std::make_shared<StringArray>(
field_meta.length, offsets, values, field_meta.null_count, null_bitmap);
} else {
*out = std::make_shared<BinaryArray>(
field_meta.length, offsets, values, field_meta.null_count, null_bitmap);
}
return Status::OK();
} else if (type->type == Type::LIST) {
std::shared_ptr<Buffer> offsets;
RETURN_NOT_OK(GetBuffer(buffer_index_++, &offsets));
const int num_children = type->num_children();
Expand All @@ -346,11 +346,10 @@ class RecordBatchReader::RecordBatchReaderImpl {
std::shared_ptr<Array> values_array;
RETURN_NOT_OK(
NextArray(type->child(0).get(), max_recursion_depth - 1, &values_array));
return MakeListArray(type, field_meta.length, offsets, values_array,
field_meta.null_count, null_bitmap, out);
}

if (type->type == Type::STRUCT) {
*out = std::make_shared<ListArray>(type, field_meta.length, offsets, values_array,
field_meta.null_count, null_bitmap);
return Status::OK();
} else if (type->type == Type::STRUCT) {
const int num_children = type->num_children();
std::vector<ArrayPtr> fields;
fields.reserve(num_children);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ struct ARROW_EXPORT StructType : public DataType {

// These will be defined elsewhere
template <typename T>
struct type_traits {};
struct TypeTraits {};

} // namespace arrow

Expand Down
12 changes: 0 additions & 12 deletions cpp/src/arrow/types/construct.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,16 +121,4 @@ Status MakePrimitiveArray(const TypePtr& type, int32_t length,
#endif
}

Status MakeListArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& offsets, const ArrayPtr& values, int32_t null_count,
const std::shared_ptr<Buffer>& null_bitmap, ArrayPtr* out) {
*out =
std::make_shared<ListArray>(type, length, offsets, values, null_count, null_bitmap);
#ifdef NDEBUG
return Status::OK();
#else
return (*out)->Validate();
#endif
}

} // namespace arrow
8 changes: 0 additions & 8 deletions cpp/src/arrow/types/construct.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,6 @@ Status ARROW_EXPORT MakePrimitiveArray(const std::shared_ptr<DataType>& type,
int32_t length, const std::shared_ptr<Buffer>& data, int32_t null_count,
const std::shared_ptr<Buffer>& null_bitmap, std::shared_ptr<Array>* out);

// Create new list arrays for logical types that are backed by ListArrays (e.g. list of
// primitives and strings)
// TODO(emkornfield) split up string vs list?
Status ARROW_EXPORT MakeListArray(const std::shared_ptr<DataType>& type, int32_t length,
const std::shared_ptr<Buffer>& offests, const std::shared_ptr<Array>& values,
int32_t null_count, const std::shared_ptr<Buffer>& null_bitmap,
std::shared_ptr<Array>* out);

} // namespace arrow

#endif // ARROW_BUILDER_H_
4 changes: 2 additions & 2 deletions cpp/src/arrow/types/primitive-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) {
int n = 1000;
ASSERT_OK(this->builder_->Reserve(n));
ASSERT_EQ(util::next_power2(n), this->builder_->capacity());
ASSERT_EQ(util::next_power2(type_traits<Type>::bytes_required(n)),
ASSERT_EQ(util::next_power2(TypeTraits<Type>::bytes_required(n)),
this->builder_->data()->size());

// unsure if this should go in all builder classes
Expand Down Expand Up @@ -471,7 +471,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
ASSERT_OK(this->builder_->Reserve(cap));
ASSERT_EQ(cap, this->builder_->capacity());

ASSERT_EQ(type_traits<Type>::bytes_required(cap), this->builder_->data()->size());
ASSERT_EQ(TypeTraits<Type>::bytes_required(cap), this->builder_->data()->size());
ASSERT_EQ(util::bytes_for_bits(cap), this->builder_->null_bitmap()->size());
}

Expand Down
14 changes: 9 additions & 5 deletions cpp/src/arrow/types/primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Status PrimitiveBuilder<T>::Init(int32_t capacity) {
RETURN_NOT_OK(ArrayBuilder::Init(capacity));
data_ = std::make_shared<PoolBuffer>(pool_);

int64_t nbytes = type_traits<T>::bytes_required(capacity);
int64_t nbytes = TypeTraits<T>::bytes_required(capacity);
RETURN_NOT_OK(data_->Resize(nbytes));
// TODO(emkornfield) valgrind complains without this
memset(data_->mutable_data(), 0, nbytes);
Expand All @@ -106,10 +106,9 @@ Status PrimitiveBuilder<T>::Resize(int32_t capacity) {
} else {
RETURN_NOT_OK(ArrayBuilder::Resize(capacity));
const int64_t old_bytes = data_->size();
const int64_t new_bytes = type_traits<T>::bytes_required(capacity);
const int64_t new_bytes = TypeTraits<T>::bytes_required(capacity);
RETURN_NOT_OK(data_->Resize(new_bytes));
raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());

memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
}
return Status::OK();
Expand All @@ -121,7 +120,7 @@ Status PrimitiveBuilder<T>::Append(
RETURN_NOT_OK(Reserve(length));

if (length > 0) {
memcpy(raw_data_ + length_, values, type_traits<T>::bytes_required(length));
memcpy(raw_data_ + length_, values, TypeTraits<T>::bytes_required(length));
}

// length_ is update by these
Expand All @@ -132,7 +131,12 @@ Status PrimitiveBuilder<T>::Append(

template <typename T>
Status PrimitiveBuilder<T>::Finish(std::shared_ptr<Array>* out) {
*out = std::make_shared<typename type_traits<T>::ArrayType>(
const int64_t bytes_required = TypeTraits<T>::bytes_required(length_);
if (bytes_required > 0 && bytes_required < data_->size()) {
// Trim buffers
RETURN_NOT_OK(data_->Resize(bytes_required));
}
*out = std::make_shared<typename TypeTraits<T>::ArrayType>(
type_, length_, data_, null_count_, null_bitmap_);

data_ = null_bitmap_ = nullptr;
Expand Down
24 changes: 12 additions & 12 deletions cpp/src/arrow/types/primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,77 +184,77 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
};

template <>
struct type_traits<UInt8Type> {
struct TypeTraits<UInt8Type> {
typedef UInt8Array ArrayType;

static inline int bytes_required(int elements) { return elements; }
};

template <>
struct type_traits<Int8Type> {
struct TypeTraits<Int8Type> {
typedef Int8Array ArrayType;

static inline int bytes_required(int elements) { return elements; }
};

template <>
struct type_traits<UInt16Type> {
struct TypeTraits<UInt16Type> {
typedef UInt16Array ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); }
};

template <>
struct type_traits<Int16Type> {
struct TypeTraits<Int16Type> {
typedef Int16Array ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(int16_t); }
};

template <>
struct type_traits<UInt32Type> {
struct TypeTraits<UInt32Type> {
typedef UInt32Array ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); }
};

template <>
struct type_traits<Int32Type> {
struct TypeTraits<Int32Type> {
typedef Int32Array ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(int32_t); }
};

template <>
struct type_traits<UInt64Type> {
struct TypeTraits<UInt64Type> {
typedef UInt64Array ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); }
};

template <>
struct type_traits<Int64Type> {
struct TypeTraits<Int64Type> {
typedef Int64Array ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
};

template <>
struct type_traits<TimestampType> {
struct TypeTraits<TimestampType> {
typedef TimestampArray ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
};
template <>

struct type_traits<FloatType> {
struct TypeTraits<FloatType> {
typedef FloatArray ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(float); }
};

template <>
struct type_traits<DoubleType> {
struct TypeTraits<DoubleType> {
typedef DoubleArray ArrayType;

static inline int bytes_required(int elements) { return elements * sizeof(double); }
Expand Down Expand Up @@ -294,7 +294,7 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
};

template <>
struct type_traits<BooleanType> {
struct TypeTraits<BooleanType> {
typedef BooleanArray ArrayType;

static inline int bytes_required(int elements) {
Expand Down
19 changes: 4 additions & 15 deletions cpp/src/arrow/types/string-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,13 @@ class TestStringContainer : public ::testing::Test {

void MakeArray() {
length_ = offsets_.size() - 1;
int nchars = chars_.size();

value_buf_ = test::to_buffer(chars_);
values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));

offsets_buf_ = test::to_buffer(offsets_);

null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
null_count_ = test::null_count(valid_bytes_);

strings_ = std::make_shared<StringArray>(
length_, offsets_buf_, values_, null_count_, null_bitmap_);
length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
}

protected:
Expand All @@ -94,7 +89,6 @@ class TestStringContainer : public ::testing::Test {
int null_count_;
int length_;

ArrayPtr values_;
std::shared_ptr<StringArray> strings_;
};

Expand Down Expand Up @@ -122,7 +116,7 @@ TEST_F(TestStringContainer, TestListFunctions) {

TEST_F(TestStringContainer, TestDestructor) {
auto arr = std::make_shared<StringArray>(
length_, offsets_buf_, values_, null_count_, null_bitmap_);
length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
}

TEST_F(TestStringContainer, TestGetString) {
Expand Down Expand Up @@ -221,18 +215,14 @@ class TestBinaryContainer : public ::testing::Test {

void MakeArray() {
length_ = offsets_.size() - 1;
int nchars = chars_.size();

value_buf_ = test::to_buffer(chars_);
values_ = ArrayPtr(new UInt8Array(nchars, value_buf_));

offsets_buf_ = test::to_buffer(offsets_);

null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_);
null_count_ = test::null_count(valid_bytes_);

strings_ = std::make_shared<BinaryArray>(
length_, offsets_buf_, values_, null_count_, null_bitmap_);
length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
}

protected:
Expand All @@ -249,7 +239,6 @@ class TestBinaryContainer : public ::testing::Test {
int null_count_;
int length_;

ArrayPtr values_;
std::shared_ptr<BinaryArray> strings_;
};

Expand Down Expand Up @@ -277,7 +266,7 @@ TEST_F(TestBinaryContainer, TestListFunctions) {

TEST_F(TestBinaryContainer, TestDestructor) {
auto arr = std::make_shared<BinaryArray>(
length_, offsets_buf_, values_, null_count_, null_bitmap_);
length_, offsets_buf_, value_buf_, null_count_, null_bitmap_);
}

TEST_F(TestBinaryContainer, TestGetValue) {
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/types/string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ BinaryArray::BinaryArray(const TypePtr& type, int32_t length,
offset_buffer_(offsets),
offsets_(reinterpret_cast<const int32_t*>(offset_buffer_->data())),
data_buffer_(data),
data_(data_buffer_->data()) {}
data_(nullptr) {
if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); }
}

Status BinaryArray::Validate() const {
// TODO(wesm): what to do here?
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ class ARROW_EXPORT BinaryArray : public Array {
}

std::shared_ptr<Buffer> data() const { return data_buffer_; }
std::shared_ptr<Buffer> offsets() const { return offset_buffer_; }

const int32_t* offsets() const { return offsets_; }
int32_t offset(int i) const { return offsets_[i]; }

// Neither of these functions will perform boundschecking
Expand Down
Loading

0 comments on commit bd70cab

Please sign in to comment.