Skip to content

Commit

Permalink
Refactoring to reflect collaprsed list-like structure of Binary and S…
Browse files Browse the repository at this point in the history
…tring types. Not yet complete

Change-Id: Ibe333dc6c3764585ed3b21fc3ab3a86a4b6dc4cb
  • Loading branch information
wesm committed Oct 17, 2016
1 parent 8e8b17f commit ae64f2e
Show file tree
Hide file tree
Showing 20 changed files with 247 additions and 269 deletions.
1 change: 0 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,6 @@ set(ARROW_SRCS

src/arrow/types/construct.cc
src/arrow/types/decimal.cc
src/arrow/types/json.cc
src/arrow/types/list.cc
src/arrow/types/primitive.cc
src/arrow/types/string.cc
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class ARROW_EXPORT ArrayBuilder {

// Creates new array object to hold the contents of the builder and transfers
// ownership of the data. This resets all variables on the builder.
virtual std::shared_ptr<Array> Finish() = 0;
virtual Status Finish(std::shared_ptr<Array>* out) = 0;

const std::shared_ptr<DataType>& type() const { return type_; }

Expand Down
19 changes: 8 additions & 11 deletions cpp/src/arrow/ipc/test-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ const auto kListInt32 = std::make_shared<ListType>(kInt32);
const auto kListListInt32 = std::make_shared<ListType>(kListInt32);

Status MakeRandomInt32Array(
int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* array) {
int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) {
std::shared_ptr<PoolBuffer> data;
test::MakeRandomInt32PoolBuffer(length, pool, &data);
const auto kInt32 = std::make_shared<Int32Type>();
Expand All @@ -52,16 +52,14 @@ Status MakeRandomInt32Array(
test::MakeRandomBytePoolBuffer(length, pool, &valid_bytes);
RETURN_NOT_OK(builder.Append(
reinterpret_cast<const int32_t*>(data->data()), length, valid_bytes->data()));
*array = builder.Finish();
return Status::OK();
return builder.Finish(out);
}
RETURN_NOT_OK(builder.Append(reinterpret_cast<const int32_t*>(data->data()), length));
*array = builder.Finish();
return Status::OK();
return builder.Finish(out);
}

Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_lists,
bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* array) {
bool include_nulls, MemoryPool* pool, std::shared_ptr<Array>* out) {
// Create the null list values
std::vector<uint8_t> valid_lists(num_lists);
const double null_percent = include_nulls ? 0.1 : 0;
Expand Down Expand Up @@ -90,8 +88,8 @@ Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_li
}
ListBuilder builder(pool, child_array);
RETURN_NOT_OK(builder.Append(offsets.data(), num_lists, valid_lists.data()));
*array = builder.Finish();
return (*array)->Validate();
RETURN_NOT_OK(builder.Finish(out));
return (*out)->Validate();
}

typedef Status MakeRecordBatch(std::shared_ptr<RecordBatch>* out);
Expand All @@ -115,7 +113,7 @@ Status MakeIntRecordBatch(std::shared_ptr<RecordBatch>* out) {

template <class Builder, class RawType>
Status MakeRandomBinaryArray(
const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* array) {
const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* out) {
const std::vector<std::string> values = {
"", "", "abc", "123", "efg", "456!@#!@#", "12312"};
Builder builder(pool, type);
Expand All @@ -130,8 +128,7 @@ Status MakeRandomBinaryArray(
builder.Append(reinterpret_cast<const RawType*>(value.data()), value.size()));
}
}
*array = builder.Finish();
return Status::OK();
return builder.Finish(out);
}

Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out) {
Expand Down
18 changes: 3 additions & 15 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ struct ARROW_EXPORT DoubleType : public PrimitiveType<DoubleType> {
struct ARROW_EXPORT ListType : public DataType {
// List can contain any other logical value type
explicit ListType(const std::shared_ptr<DataType>& value_type)
: ListType(value_type, Type::LIST) {}
: ListType(std::make_shared<Field>("item", value_type)) {}

explicit ListType(const std::shared_ptr<Field>& value_field) : DataType(Type::LIST) {
children_ = {value_field};
Expand All @@ -255,26 +255,17 @@ struct ARROW_EXPORT ListType : public DataType {
static char const* name() { return "list"; }

std::string ToString() const override;

protected:
// Constructor for classes that are implemented as List Arrays.
ListType(const std::shared_ptr<DataType>& value_type, Type::type logical_type)
: DataType(logical_type) {
// TODO ARROW-187 this can technically fail, make a constructor method ?
children_ = {std::make_shared<Field>("item", value_type)};
}
};

// BinaryType type is reprsents lists of 1-byte values.
struct ARROW_EXPORT BinaryType : public ListType {
struct ARROW_EXPORT BinaryType : public DataType {
BinaryType() : BinaryType(Type::BINARY) {}
static char const* name() { return "binary"; }
std::string ToString() const override;

protected:
// Allow subclasses to change the logical type.
explicit BinaryType(Type::type logical_type)
: ListType(std::shared_ptr<DataType>(new UInt8Type()), logical_type) {}
explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
};

// UTF encoded strings
Expand All @@ -284,9 +275,6 @@ struct ARROW_EXPORT StringType : public BinaryType {
static char const* name() { return "string"; }

std::string ToString() const override;

protected:
explicit StringType(Type::type logical_type) : BinaryType(logical_type) {}
};

struct ARROW_EXPORT StructType : public DataType {
Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ install(FILES
construct.h
datetime.h
decimal.h
json.h
list.h
primitive.h
string.h
Expand Down
28 changes: 0 additions & 28 deletions cpp/src/arrow/types/binary.h

This file was deleted.

23 changes: 5 additions & 18 deletions cpp/src/arrow/types/construct.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
BUILDER_CASE(DOUBLE, DoubleBuilder);

BUILDER_CASE(STRING, StringBuilder);
BUILDER_CASE(BINARY, BinaryBuilder);

case Type::LIST: {
std::shared_ptr<ArrayBuilder> value_builder;
Expand Down Expand Up @@ -105,10 +106,10 @@ Status MakePrimitiveArray(const TypePtr& type, int32_t length,
MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array);
MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array);
MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array);
MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array);
MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray);
MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray);
MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray);
MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array);
MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray);
MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP_DOUBLE, DoubleArray);
default:
return Status::NotImplemented(type->ToString());
Expand All @@ -123,22 +124,8 @@ Status MakePrimitiveArray(const TypePtr& type, int32_t length,
Status MakeListArray(const TypePtr& type, int32_t length,
const std::shared_ptr<Buffer>& offsets, const ArrayPtr& values, int32_t null_count,
const std::shared_ptr<Buffer>& null_bitmap, ArrayPtr* out) {
switch (type->type) {
case Type::BINARY:
out->reset(new BinaryArray(type, length, offsets, values, null_count, null_bitmap));
break;

case Type::LIST:
out->reset(new ListArray(type, length, offsets, values, null_count, null_bitmap));
break;

case Type::DECIMAL_TEXT:
case Type::STRING:
out->reset(new StringArray(type, length, offsets, values, null_count, null_bitmap));
break;
default:
return Status::NotImplemented(type->ToString());
}
*out =
std::make_shared<ListArray>(type, length, offsets, values, null_count, null_bitmap);
#ifdef NDEBUG
return Status::OK();
#else
Expand Down
37 changes: 0 additions & 37 deletions cpp/src/arrow/types/json.cc

This file was deleted.

36 changes: 0 additions & 36 deletions cpp/src/arrow/types/json.h

This file was deleted.

15 changes: 11 additions & 4 deletions cpp/src/arrow/types/list-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ class TestListBuilder : public TestBuilder {
builder_ = std::dynamic_pointer_cast<ListBuilder>(tmp);
}

void Done() { result_ = std::dynamic_pointer_cast<ListArray>(builder_->Finish()); }
void Done() {
std::shared_ptr<Array> out;
EXPECT_OK(builder_->Finish(&out));
result_ = std::dynamic_pointer_cast<ListArray>(out);
}

protected:
TypePtr value_type_;
Expand All @@ -98,14 +102,17 @@ TEST_F(TestListBuilder, Equality) {
// setup two equal arrays
ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
array = builder_->Finish();

ASSERT_OK(builder_->Finish(&array));
ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size()));
ASSERT_OK(vb->Append(equal_values.data(), equal_values.size()));
equal_array = builder_->Finish();

ASSERT_OK(builder_->Finish(&equal_array));
// now an unequal one
ASSERT_OK(builder_->Append(unequal_offsets.data(), unequal_offsets.size()));
ASSERT_OK(vb->Append(unequal_values.data(), unequal_values.size()));
unequal_array = builder_->Finish();

ASSERT_OK(builder_->Finish(&unequal_array));

// Test array equality
EXPECT_TRUE(array->Equals(array));
Expand Down
42 changes: 38 additions & 4 deletions cpp/src/arrow/types/list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ bool ListArray::EqualsExact(const ListArray& other) const {
if (null_count_ != other.null_count_) { return false; }

bool equal_offsets =
offset_buf_->Equals(*other.offset_buf_, (length_ + 1) * sizeof(int32_t));
offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t));
if (!equal_offsets) { return false; }
bool equal_null_bitmap = true;
if (null_count_ > 0) {
Expand Down Expand Up @@ -72,10 +72,10 @@ bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_st

Status ListArray::Validate() const {
if (length_ < 0) { return Status::Invalid("Length was negative"); }
if (!offset_buf_) { return Status::Invalid("offset_buf_ was null"); }
if (offset_buf_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); }
if (offset_buffer_->size() / static_cast<int>(sizeof(int32_t)) < length_) {
std::stringstream ss;
ss << "offset buffer size (bytes): " << offset_buf_->size()
ss << "offset buffer size (bytes): " << offset_buffer_->size()
<< " isn't large enough for length: " << length_;
return Status::Invalid(ss.str());
}
Expand Down Expand Up @@ -121,4 +121,38 @@ Status ListArray::Validate() const {
return Status::OK();
}

Status ListBuilder::Init(int32_t elements) {
DCHECK_LT(elements, std::numeric_limits<int32_t>::max());
RETURN_NOT_OK(ArrayBuilder::Init(elements));
// one more then requested for offsets
return offset_builder_.Resize((elements + 1) * sizeof(int32_t));
}

Status ListBuilder::Resize(int32_t capacity) {
DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
// one more then requested for offsets
RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t)));
return ArrayBuilder::Resize(capacity);
}

Status ListBuilder::Finish(std::shared_ptr<Array>* out) {
std::shared_ptr<Array> items = values_;
if (!items) { RETURN_NOT_OK(value_builder_->Finish(&items)); }

RETURN_NOT_OK(offset_builder_.Append<int32_t>(items->length()));
std::shared_ptr<Buffer> offsets = offset_builder_.Finish();

*out = std::make_shared<ListArray>(
type_, length_, offsets, items, null_count_, null_bitmap_);

Reset();

return Status::OK();
}

void ListBuilder::Reset() {
capacity_ = length_ = null_count_ = 0;
null_bitmap_ = nullptr;
}

} // namespace arrow
Loading

0 comments on commit ae64f2e

Please sign in to comment.