Skip to content

Commit

Permalink
Add Column chunk type validation function
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 4, 2016
1 parent 8a2e40e commit 988135c
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 20 deletions.
1 change: 0 additions & 1 deletion cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ class Array {
DISALLOW_COPY_AND_ASSIGN(Array);
};


typedef std::shared_ptr<Array> ArrayPtr;

} // namespace arrow
Expand Down
32 changes: 24 additions & 8 deletions cpp/src/arrow/table/column-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
#include "arrow/field.h"
#include "arrow/schema.h"
#include "arrow/table/column.h"
#include "arrow/test-util.h"
#include "arrow/type.h"
#include "arrow/types/integer.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/buffer.h"
#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"
Expand All @@ -41,12 +43,13 @@ class TestColumn : public ::testing::Test {
pool_ = GetDefaultMemoryPool();
}

std::shared_ptr<Array> MakeInt32Array(int32_t length, int32_t null_count = 0) {
template <typename ArrayType>
std::shared_ptr<Array> MakeArray(int32_t length, int32_t null_count = 0) {
auto data = std::make_shared<PoolBuffer>(pool_);
auto nulls = std::make_shared<PoolBuffer>(pool_);
data->Resize(400);
data->Resize(13);
return std::make_shared<Int32Array>(100, data, 10, nulls);
data->Resize(length * sizeof(typename ArrayType::value_type));
nulls->Resize(util::bytes_for_bits(length));
return std::make_shared<ArrayType>(length, data, 10, nulls);
}

protected:
Expand All @@ -58,20 +61,33 @@ class TestColumn : public ::testing::Test {

TEST_F(TestColumn, BasicAPI) {
ArrayVector arrays;

arrays.push_back(MakeInt32Array(100));
arrays.push_back(MakeInt32Array(100, 10));
arrays.push_back(MakeInt32Array(100, 20));
arrays.push_back(MakeArray<Int32Array>(100));
arrays.push_back(MakeArray<Int32Array>(100, 10));
arrays.push_back(MakeArray<Int32Array>(100, 20));

auto field = std::make_shared<Field>("c0", INT32);
column_.reset(new Column(field, arrays));

ASSERT_EQ("c0", column_->name());
ASSERT_TRUE(column_->type()->Equals(INT32));
ASSERT_EQ(300, column_->length());
ASSERT_EQ(30, column_->null_count());
ASSERT_EQ(3, column_->data()->num_chunks());
}

TEST_F(TestColumn, ChunksInhomogeneous) {
ArrayVector arrays;
arrays.push_back(MakeArray<Int32Array>(100));
arrays.push_back(MakeArray<Int32Array>(100, 10));

auto field = std::make_shared<Field>("c0", INT32);
column_.reset(new Column(field, arrays));

ASSERT_OK(column_->ValidateData());

arrays.push_back(MakeArray<Int16Array>(100, 10));
column_.reset(new Column(field, arrays));
ASSERT_RAISES(Invalid, column_->ValidateData());
}

} // namespace arrow
17 changes: 17 additions & 0 deletions cpp/src/arrow/table/column.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
#include "arrow/table/column.h"

#include <memory>
#include <sstream>

#include "arrow/field.h"
#include "arrow/util/status.h"

namespace arrow {

Expand All @@ -42,4 +44,19 @@ Column::Column(const std::shared_ptr<Field>& field,
field_(field),
data_(data) {}

Status Column::ValidateData() {
for (int i = 0; i < data_->num_chunks(); ++i) {
const std::shared_ptr<DataType>& type = data_->chunk(i)->type();
if (!this->type()->Equals(type)) {
std::stringstream ss;
ss << "In chunk " << i << " expected type "
<< this->type()->ToString()
<< " but saw "
<< type->ToString();
return Status::Invalid(ss.str());
}
}
return Status::OK();
}

} // namespace arrow
4 changes: 3 additions & 1 deletion cpp/src/arrow/table/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ class ChunkedArray {
return chunks_.size();
}

const std::shared_ptr<Array>& chunk(int i) const;
const std::shared_ptr<Array>& chunk(int i) const {
return chunks_[i];
}

protected:
ArrayVector chunks_;
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,15 @@ struct DataType {
nullable(nullable) {}

virtual bool Equals(const DataType* other) {
// Call with a pointer so more friendly to subclasses
return this == other || (this->type == other->type &&
this->nullable == other->nullable);
}

bool Equals(const std::shared_ptr<DataType>& other) {
return Equals(other.get());
}

virtual std::string ToString() const = 0;
};

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/list.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class ListBuilder : public Int32Builder {
//
// If passed, null_bytes is of equal length to values, and any nonzero byte
// will be considered as a null for that slot
Status Append(T* values, int32_t length, uint8_t* null_bytes = nullptr) {
Status Append(value_type* values, int32_t length, uint8_t* null_bytes = nullptr) {
if (length_ + length > capacity_) {
int32_t new_capacity = util::next_power2(length_ + length);
RETURN_NOT_OK(Resize(new_capacity));
Expand Down
20 changes: 11 additions & 9 deletions cpp/src/arrow/types/primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class PrimitiveArray : public Array {
template <typename TypeClass>
class PrimitiveArrayImpl : public PrimitiveArray {
public:
typedef typename TypeClass::c_type T;
typedef typename TypeClass::c_type value_type;

PrimitiveArrayImpl() : PrimitiveArray() {}

Expand All @@ -81,9 +81,11 @@ class PrimitiveArrayImpl : public PrimitiveArray {
return PrimitiveArray::Equals(*static_cast<const PrimitiveArray*>(&other));
}

const T* raw_data() const { return reinterpret_cast<const T*>(raw_data_);}
const value_type* raw_data() const {
return reinterpret_cast<const value_type*>(raw_data_);
}

T Value(int i) const {
value_type Value(int i) const {
return raw_data()[i];
}

Expand All @@ -96,12 +98,12 @@ class PrimitiveArrayImpl : public PrimitiveArray {
template <typename Type, typename ArrayType>
class PrimitiveBuilder : public ArrayBuilder {
public:
typedef typename Type::c_type T;
typedef typename Type::c_type value_type;

explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) :
ArrayBuilder(pool, type),
values_(nullptr) {
elsize_ = sizeof(T);
elsize_ = sizeof(value_type);
}

virtual ~PrimitiveBuilder() {}
Expand Down Expand Up @@ -141,7 +143,7 @@ class PrimitiveBuilder : public ArrayBuilder {
}

// Scalar append
Status Append(T val, bool is_null = false) {
Status Append(value_type val, bool is_null = false) {
if (length_ == capacity_) {
// If the capacity was not already a multiple of 2, do so here
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
Expand All @@ -158,7 +160,7 @@ class PrimitiveBuilder : public ArrayBuilder {
//
// If passed, null_bytes is of equal length to values, and any nonzero byte
// will be considered as a null for that slot
Status Append(const T* values, int32_t length,
Status Append(const value_type* values, int32_t length,
const uint8_t* null_bytes = nullptr) {
if (length_ + length > capacity_) {
int32_t new_capacity = util::next_power2(length_ + length);
Expand Down Expand Up @@ -215,8 +217,8 @@ class PrimitiveBuilder : public ArrayBuilder {
return Status::OK();
}

T* raw_buffer() {
return reinterpret_cast<T*>(values_->mutable_data());
value_type* raw_buffer() {
return reinterpret_cast<value_type*>(values_->mutable_data());
}

std::shared_ptr<Buffer> buffer() const {
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/util/bit-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ static inline int64_t ceil_byte(int64_t size) {
return (size + 7) & ~7;
}

static inline int64_t bytes_for_bits(int64_t size) {
return ceil_byte(size) / 8;
}

static inline int64_t ceil_2bytes(int64_t size) {
return (size + 15) & ~15;
}
Expand Down

0 comments on commit 988135c

Please sign in to comment.