From 732a2059d0c4493e451c566160b9d5d01dfe87be Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 17 Oct 2016 13:44:34 -0400 Subject: [PATCH] ARROW-261: Refactor String/Binary code paths to reflect unnested (non-list-based) structure Per discussions on the mailing list. This should in theory match the Java implementation. Author: Wes McKinney Closes #176 from wesm/ARROW-261 and squashes the following commits: dca39ce [Wes McKinney] Make binary/string constants static to avoid memory-access-related segfaults in third party libraries 1e65b01 [Wes McKinney] Deprecate pyarrow::Status in favor of just arrow::Status. Conform pyarrow use of ArrayBuilder::Finish 9a1f77e [Wes McKinney] Add license header to index.rst bd70cab [Wes McKinney] Complete refactoring, fix up IPC tests for flattened string/binary buffer/metadata layout ae64f2e [Wes McKinney] Refactoring to reflect collaprsed list-like structure of Binary and String types. Not yet complete --- cpp/CMakeLists.txt | 1 - cpp/src/arrow/builder.h | 2 +- cpp/src/arrow/ipc/adapter.cc | 47 ++++---- cpp/src/arrow/ipc/test-common.h | 19 ++-- cpp/src/arrow/type.h | 20 +--- cpp/src/arrow/types/CMakeLists.txt | 1 - cpp/src/arrow/types/binary.h | 28 ----- cpp/src/arrow/types/construct.cc | 31 +----- cpp/src/arrow/types/construct.h | 8 -- cpp/src/arrow/types/json.cc | 37 ------- cpp/src/arrow/types/json.h | 36 ------ cpp/src/arrow/types/list-test.cc | 15 ++- cpp/src/arrow/types/list.cc | 42 ++++++- cpp/src/arrow/types/list.h | 49 ++------- cpp/src/arrow/types/primitive-test.cc | 26 +++-- cpp/src/arrow/types/primitive.cc | 31 ++++-- cpp/src/arrow/types/primitive.h | 31 +++--- cpp/src/arrow/types/string-test.cc | 33 +++--- cpp/src/arrow/types/string.cc | 101 ++++++++++++++--- cpp/src/arrow/types/string.h | 49 +++++---- cpp/src/arrow/types/struct-test.cc | 21 +++- cpp/src/arrow/types/struct.cc | 14 +++ cpp/src/arrow/types/struct.h | 17 +-- cpp/src/arrow/util/status.cc | 6 + cpp/src/arrow/util/status.h | 17 ++- python/CMakeLists.txt | 2 - python/doc/index.rst | 18 ++- python/pyarrow/error.pxd | 4 +- python/pyarrow/error.pyx | 10 +- python/pyarrow/includes/pyarrow.pxd | 35 ++---- python/pyarrow/io.pyx | 56 +++++----- python/pyarrow/ipc.pyx | 18 +-- python/pyarrow/parquet.pyx | 14 +-- python/src/pyarrow/adapters/builtin.cc | 39 ++++--- python/src/pyarrow/adapters/builtin.h | 9 +- python/src/pyarrow/adapters/pandas.cc | 32 +++--- python/src/pyarrow/adapters/pandas.h | 15 ++- python/src/pyarrow/api.h | 2 - python/src/pyarrow/common.cc | 12 +- python/src/pyarrow/common.h | 7 -- python/src/pyarrow/io.cc | 59 +++++----- python/src/pyarrow/status.cc | 92 ---------------- python/src/pyarrow/status.h | 146 ------------------------- 43 files changed, 484 insertions(+), 768 deletions(-) delete mode 100644 cpp/src/arrow/types/binary.h delete mode 100644 cpp/src/arrow/types/json.cc delete mode 100644 cpp/src/arrow/types/json.h delete mode 100644 python/src/pyarrow/status.cc delete mode 100644 python/src/pyarrow/status.h diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d682dc76f8ced..6f954830b6334 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -681,7 +681,6 @@ set(ARROW_SRCS src/arrow/types/construct.cc src/arrow/types/decimal.cc - src/arrow/types/json.cc src/arrow/types/list.cc src/arrow/types/primitive.cc src/arrow/types/string.cc diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 646a6f24e9df8..cef17e5aabab9 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -93,7 +93,7 @@ class ARROW_EXPORT ArrayBuilder { // Creates new array object to hold the contents of the builder and transfers // ownership of the data. This resets all variables on the builder. - virtual std::shared_ptr Finish() = 0; + virtual Status Finish(std::shared_ptr* out) = 0; const std::shared_ptr& type() const { return type_; } diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc index cd8ab53a31d1f..f84cb264f70e1 100644 --- a/cpp/src/arrow/ipc/adapter.cc +++ b/cpp/src/arrow/ipc/adapter.cc @@ -78,22 +78,6 @@ static bool IsPrimitive(const DataType* type) { } } -static bool IsListType(const DataType* type) { - DCHECK(type != nullptr); - switch (type->type) { - // TODO(emkornfield) grouping like this are used in a few places in the - // code consider using pattern like: - // http://stackoverflow.com/questions/26784685/c-macro-for-calling-function-based-on-enum-type - // - case Type::BINARY: - case Type::LIST: - case Type::STRING: - return true; - default: - return false; - } -} - // ---------------------------------------------------------------------- // Record batch write path @@ -115,7 +99,11 @@ Status VisitArray(const Array* arr, std::vector* field_nodes if (IsPrimitive(arr_type)) { const auto prim_arr = static_cast(arr); buffers->push_back(prim_arr->data()); - } else if (IsListType(arr_type)) { + } else if (arr->type_enum() == Type::STRING || arr->type_enum() == Type::BINARY) { + const auto binary_arr = static_cast(arr); + buffers->push_back(binary_arr->offsets()); + buffers->push_back(binary_arr->data()); + } else if (arr->type_enum() == Type::LIST) { const auto list_arr = static_cast(arr); buffers->push_back(list_arr->offset_buffer()); RETURN_NOT_OK(VisitArray( @@ -331,9 +319,21 @@ class RecordBatchReader::RecordBatchReaderImpl { } return MakePrimitiveArray( type, field_meta.length, data, field_meta.null_count, null_bitmap, out); - } + } else if (type->type == Type::STRING || type->type == Type::BINARY) { + std::shared_ptr offsets; + std::shared_ptr values; + RETURN_NOT_OK(GetBuffer(buffer_index_++, &offsets)); + RETURN_NOT_OK(GetBuffer(buffer_index_++, &values)); - if (IsListType(type.get())) { + if (type->type == Type::STRING) { + *out = std::make_shared( + field_meta.length, offsets, values, field_meta.null_count, null_bitmap); + } else { + *out = std::make_shared( + field_meta.length, offsets, values, field_meta.null_count, null_bitmap); + } + return Status::OK(); + } else if (type->type == Type::LIST) { std::shared_ptr offsets; RETURN_NOT_OK(GetBuffer(buffer_index_++, &offsets)); const int num_children = type->num_children(); @@ -346,11 +346,10 @@ class RecordBatchReader::RecordBatchReaderImpl { std::shared_ptr values_array; RETURN_NOT_OK( NextArray(type->child(0).get(), max_recursion_depth - 1, &values_array)); - return MakeListArray(type, field_meta.length, offsets, values_array, - field_meta.null_count, null_bitmap, out); - } - - if (type->type == Type::STRUCT) { + *out = std::make_shared(type, field_meta.length, offsets, values_array, + field_meta.null_count, null_bitmap); + return Status::OK(); + } else if (type->type == Type::STRUCT) { const int num_children = type->num_children(); std::vector fields; fields.reserve(num_children); diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index 7d02bc302f40e..13bbbebde8aa1 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -42,7 +42,7 @@ const auto kListInt32 = std::make_shared(kInt32); const auto kListListInt32 = std::make_shared(kListInt32); Status MakeRandomInt32Array( - int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr* array) { + int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr* out) { std::shared_ptr data; test::MakeRandomInt32PoolBuffer(length, pool, &data); const auto kInt32 = std::make_shared(); @@ -52,16 +52,14 @@ Status MakeRandomInt32Array( test::MakeRandomBytePoolBuffer(length, pool, &valid_bytes); RETURN_NOT_OK(builder.Append( reinterpret_cast(data->data()), length, valid_bytes->data())); - *array = builder.Finish(); - return Status::OK(); + return builder.Finish(out); } RETURN_NOT_OK(builder.Append(reinterpret_cast(data->data()), length)); - *array = builder.Finish(); - return Status::OK(); + return builder.Finish(out); } Status MakeRandomListArray(const std::shared_ptr& child_array, int num_lists, - bool include_nulls, MemoryPool* pool, std::shared_ptr* array) { + bool include_nulls, MemoryPool* pool, std::shared_ptr* out) { // Create the null list values std::vector valid_lists(num_lists); const double null_percent = include_nulls ? 0.1 : 0; @@ -90,8 +88,8 @@ Status MakeRandomListArray(const std::shared_ptr& child_array, int num_li } ListBuilder builder(pool, child_array); RETURN_NOT_OK(builder.Append(offsets.data(), num_lists, valid_lists.data())); - *array = builder.Finish(); - return (*array)->Validate(); + RETURN_NOT_OK(builder.Finish(out)); + return (*out)->Validate(); } typedef Status MakeRecordBatch(std::shared_ptr* out); @@ -115,7 +113,7 @@ Status MakeIntRecordBatch(std::shared_ptr* out) { template Status MakeRandomBinaryArray( - const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* array) { + const TypePtr& type, int32_t length, MemoryPool* pool, ArrayPtr* out) { const std::vector values = { "", "", "abc", "123", "efg", "456!@#!@#", "12312"}; Builder builder(pool, type); @@ -130,8 +128,7 @@ Status MakeRandomBinaryArray( builder.Append(reinterpret_cast(value.data()), value.size())); } } - *array = builder.Finish(); - return Status::OK(); + return builder.Finish(out); } Status MakeStringTypesRecordBatch(std::shared_ptr* out) { diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index b4c3721a72895..ea8516fc34798 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -242,7 +242,7 @@ struct ARROW_EXPORT DoubleType : public PrimitiveType { struct ARROW_EXPORT ListType : public DataType { // List can contain any other logical value type explicit ListType(const std::shared_ptr& value_type) - : ListType(value_type, Type::LIST) {} + : ListType(std::make_shared("item", value_type)) {} explicit ListType(const std::shared_ptr& value_field) : DataType(Type::LIST) { children_ = {value_field}; @@ -255,26 +255,17 @@ struct ARROW_EXPORT ListType : public DataType { static char const* name() { return "list"; } std::string ToString() const override; - - protected: - // Constructor for classes that are implemented as List Arrays. - ListType(const std::shared_ptr& value_type, Type::type logical_type) - : DataType(logical_type) { - // TODO ARROW-187 this can technically fail, make a constructor method ? - children_ = {std::make_shared("item", value_type)}; - } }; // BinaryType type is reprsents lists of 1-byte values. -struct ARROW_EXPORT BinaryType : public ListType { +struct ARROW_EXPORT BinaryType : public DataType { BinaryType() : BinaryType(Type::BINARY) {} static char const* name() { return "binary"; } std::string ToString() const override; protected: // Allow subclasses to change the logical type. - explicit BinaryType(Type::type logical_type) - : ListType(std::shared_ptr(new UInt8Type()), logical_type) {} + explicit BinaryType(Type::type logical_type) : DataType(logical_type) {} }; // UTF encoded strings @@ -284,9 +275,6 @@ struct ARROW_EXPORT StringType : public BinaryType { static char const* name() { return "string"; } std::string ToString() const override; - - protected: - explicit StringType(Type::type logical_type) : BinaryType(logical_type) {} }; struct ARROW_EXPORT StructType : public DataType { @@ -300,7 +288,7 @@ struct ARROW_EXPORT StructType : public DataType { // These will be defined elsewhere template -struct type_traits {}; +struct TypeTraits {}; } // namespace arrow diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt index 72a8e77664610..9f7816989827d 100644 --- a/cpp/src/arrow/types/CMakeLists.txt +++ b/cpp/src/arrow/types/CMakeLists.txt @@ -25,7 +25,6 @@ install(FILES construct.h datetime.h decimal.h - json.h list.h primitive.h string.h diff --git a/cpp/src/arrow/types/binary.h b/cpp/src/arrow/types/binary.h deleted file mode 100644 index 201fbb6e79536..0000000000000 --- a/cpp/src/arrow/types/binary.h +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_BINARY_H -#define ARROW_TYPES_BINARY_H - -#include -#include - -#include "arrow/type.h" - -namespace arrow {} // namespace arrow - -#endif // ARROW_TYPES_BINARY_H diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc index 0b71ea965516c..67245f8ea1fda 100644 --- a/cpp/src/arrow/types/construct.cc +++ b/cpp/src/arrow/types/construct.cc @@ -59,6 +59,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, BUILDER_CASE(DOUBLE, DoubleBuilder); BUILDER_CASE(STRING, StringBuilder); + BUILDER_CASE(BINARY, BinaryBuilder); case Type::LIST: { std::shared_ptr value_builder; @@ -105,10 +106,10 @@ Status MakePrimitiveArray(const TypePtr& type, int32_t length, MAKE_PRIMITIVE_ARRAY_CASE(INT32, Int32Array); MAKE_PRIMITIVE_ARRAY_CASE(UINT64, UInt64Array); MAKE_PRIMITIVE_ARRAY_CASE(INT64, Int64Array); - MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array); - MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray); MAKE_PRIMITIVE_ARRAY_CASE(FLOAT, FloatArray); MAKE_PRIMITIVE_ARRAY_CASE(DOUBLE, DoubleArray); + MAKE_PRIMITIVE_ARRAY_CASE(TIME, Int64Array); + MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP, TimestampArray); MAKE_PRIMITIVE_ARRAY_CASE(TIMESTAMP_DOUBLE, DoubleArray); default: return Status::NotImplemented(type->ToString()); @@ -120,30 +121,4 @@ Status MakePrimitiveArray(const TypePtr& type, int32_t length, #endif } -Status MakeListArray(const TypePtr& type, int32_t length, - const std::shared_ptr& offsets, const ArrayPtr& values, int32_t null_count, - const std::shared_ptr& null_bitmap, ArrayPtr* out) { - switch (type->type) { - case Type::BINARY: - out->reset(new BinaryArray(type, length, offsets, values, null_count, null_bitmap)); - break; - - case Type::LIST: - out->reset(new ListArray(type, length, offsets, values, null_count, null_bitmap)); - break; - - case Type::DECIMAL_TEXT: - case Type::STRING: - out->reset(new StringArray(type, length, offsets, values, null_count, null_bitmap)); - break; - default: - return Status::NotImplemented(type->ToString()); - } -#ifdef NDEBUG - return Status::OK(); -#else - return (*out)->Validate(); -#endif -} - } // namespace arrow diff --git a/cpp/src/arrow/types/construct.h b/cpp/src/arrow/types/construct.h index afdadbe079013..e18e946d1a64c 100644 --- a/cpp/src/arrow/types/construct.h +++ b/cpp/src/arrow/types/construct.h @@ -42,14 +42,6 @@ Status ARROW_EXPORT MakePrimitiveArray(const std::shared_ptr& type, int32_t length, const std::shared_ptr& data, int32_t null_count, const std::shared_ptr& null_bitmap, std::shared_ptr* out); -// Create new list arrays for logical types that are backed by ListArrays (e.g. list of -// primitives and strings) -// TODO(emkornfield) split up string vs list? -Status ARROW_EXPORT MakeListArray(const std::shared_ptr& type, int32_t length, - const std::shared_ptr& offests, const std::shared_ptr& values, - int32_t null_count, const std::shared_ptr& null_bitmap, - std::shared_ptr* out); - } // namespace arrow #endif // ARROW_BUILDER_H_ diff --git a/cpp/src/arrow/types/json.cc b/cpp/src/arrow/types/json.cc deleted file mode 100644 index 89240fc22bb2c..0000000000000 --- a/cpp/src/arrow/types/json.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/types/json.h" - -#include - -#include "arrow/type.h" -#include "arrow/types/union.h" - -namespace arrow { - -static const TypePtr Null(new NullType()); -static const TypePtr Int32(new Int32Type()); -static const TypePtr String(new StringType()); -static const TypePtr Double(new DoubleType()); -static const TypePtr Bool(new BooleanType()); - -static const std::vector kJsonTypes = {Null, Int32, String, Double, Bool}; -TypePtr JSONScalar::dense_type = TypePtr(new DenseUnionType(kJsonTypes)); -TypePtr JSONScalar::sparse_type = TypePtr(new SparseUnionType(kJsonTypes)); - -} // namespace arrow diff --git a/cpp/src/arrow/types/json.h b/cpp/src/arrow/types/json.h deleted file mode 100644 index 9de961f79a60a..0000000000000 --- a/cpp/src/arrow/types/json.h +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_JSON_H -#define ARROW_TYPES_JSON_H - -#include "arrow/type.h" - -namespace arrow { - -struct JSONScalar : public DataType { - bool dense; - - static TypePtr dense_type; - static TypePtr sparse_type; - - explicit JSONScalar(bool dense = true) : DataType(Type::JSON_SCALAR), dense(dense) {} -}; - -} // namespace arrow - -#endif // ARROW_TYPES_JSON_H diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index 2e41b4a61caf2..12c539495a28b 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -76,7 +76,11 @@ class TestListBuilder : public TestBuilder { builder_ = std::dynamic_pointer_cast(tmp); } - void Done() { result_ = std::dynamic_pointer_cast(builder_->Finish()); } + void Done() { + std::shared_ptr out; + EXPECT_OK(builder_->Finish(&out)); + result_ = std::dynamic_pointer_cast(out); + } protected: TypePtr value_type_; @@ -98,14 +102,17 @@ TEST_F(TestListBuilder, Equality) { // setup two equal arrays ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size())); ASSERT_OK(vb->Append(equal_values.data(), equal_values.size())); - array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&array)); ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size())); ASSERT_OK(vb->Append(equal_values.data(), equal_values.size())); - equal_array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&equal_array)); // now an unequal one ASSERT_OK(builder_->Append(unequal_offsets.data(), unequal_offsets.size())); ASSERT_OK(vb->Append(unequal_values.data(), unequal_values.size())); - unequal_array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&unequal_array)); // Test array equality EXPECT_TRUE(array->Equals(array)); diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 6334054caf84a..ef2ec22cb5336 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -25,7 +25,7 @@ bool ListArray::EqualsExact(const ListArray& other) const { if (null_count_ != other.null_count_) { return false; } bool equal_offsets = - offset_buf_->Equals(*other.offset_buf_, (length_ + 1) * sizeof(int32_t)); + offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t)); if (!equal_offsets) { return false; } bool equal_null_bitmap = true; if (null_count_ > 0) { @@ -72,10 +72,10 @@ bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_st Status ListArray::Validate() const { if (length_ < 0) { return Status::Invalid("Length was negative"); } - if (!offset_buf_) { return Status::Invalid("offset_buf_ was null"); } - if (offset_buf_->size() / static_cast(sizeof(int32_t)) < length_) { + if (!offset_buffer_) { return Status::Invalid("offset_buffer_ was null"); } + if (offset_buffer_->size() / static_cast(sizeof(int32_t)) < length_) { std::stringstream ss; - ss << "offset buffer size (bytes): " << offset_buf_->size() + ss << "offset buffer size (bytes): " << offset_buffer_->size() << " isn't large enough for length: " << length_; return Status::Invalid(ss.str()); } @@ -121,4 +121,38 @@ Status ListArray::Validate() const { return Status::OK(); } +Status ListBuilder::Init(int32_t elements) { + DCHECK_LT(elements, std::numeric_limits::max()); + RETURN_NOT_OK(ArrayBuilder::Init(elements)); + // one more then requested for offsets + return offset_builder_.Resize((elements + 1) * sizeof(int32_t)); +} + +Status ListBuilder::Resize(int32_t capacity) { + DCHECK_LT(capacity, std::numeric_limits::max()); + // one more then requested for offsets + RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t))); + return ArrayBuilder::Resize(capacity); +} + +Status ListBuilder::Finish(std::shared_ptr* out) { + std::shared_ptr items = values_; + if (!items) { RETURN_NOT_OK(value_builder_->Finish(&items)); } + + RETURN_NOT_OK(offset_builder_.Append(items->length())); + std::shared_ptr offsets = offset_builder_.Finish(); + + *out = std::make_shared( + type_, length_, offsets, items, null_count_, null_bitmap_); + + Reset(); + + return Status::OK(); +} + +void ListBuilder::Reset() { + capacity_ = length_ = null_count_ = 0; + null_bitmap_ = nullptr; +} + } // namespace arrow diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index f3894510d091a..9440ffed4bf8a 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -43,9 +43,9 @@ class ARROW_EXPORT ListArray : public Array { const ArrayPtr& values, int32_t null_count = 0, std::shared_ptr null_bitmap = nullptr) : Array(type, length, null_count, null_bitmap) { - offset_buf_ = offsets; - offsets_ = offsets == nullptr ? nullptr - : reinterpret_cast(offset_buf_->data()); + offset_buffer_ = offsets; + offsets_ = offsets == nullptr ? nullptr : reinterpret_cast( + offset_buffer_->data()); values_ = values; } @@ -57,7 +57,7 @@ class ARROW_EXPORT ListArray : public Array { // with this array. const std::shared_ptr& values() const { return values_; } const std::shared_ptr offset_buffer() const { - return std::static_pointer_cast(offset_buf_); + return std::static_pointer_cast(offset_buffer_); } const std::shared_ptr& value_type() const { return values_->type(); } @@ -77,7 +77,7 @@ class ARROW_EXPORT ListArray : public Array { const ArrayPtr& arr) const override; protected: - std::shared_ptr offset_buf_; + std::shared_ptr offset_buffer_; const int32_t* offsets_; ArrayPtr values_; }; @@ -119,19 +119,9 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder { virtual ~ListBuilder() {} - Status Init(int32_t elements) override { - DCHECK_LT(elements, std::numeric_limits::max()); - RETURN_NOT_OK(ArrayBuilder::Init(elements)); - // one more then requested for offsets - return offset_builder_.Resize((elements + 1) * sizeof(int32_t)); - } - - Status Resize(int32_t capacity) override { - DCHECK_LT(capacity, std::numeric_limits::max()); - // one more then requested for offsets - RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t))); - return ArrayBuilder::Resize(capacity); - } + Status Init(int32_t elements) override; + Status Resize(int32_t capacity) override; + Status Finish(std::shared_ptr* out) override; // Vector append // @@ -145,27 +135,6 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder { return Status::OK(); } - // The same as Finalize but allows for overridding the c++ type - template - std::shared_ptr Transfer() { - std::shared_ptr items = values_; - if (!items) { items = value_builder_->Finish(); } - - offset_builder_.Append(items->length()); - - const auto offsets_buffer = offset_builder_.Finish(); - auto result = std::make_shared( - type_, length_, offsets_buffer, items, null_count_, null_bitmap_); - - // TODO(emkornfield) make a reset method - capacity_ = length_ = null_count_ = 0; - null_bitmap_ = nullptr; - - return result; - } - - std::shared_ptr Finish() override { return Transfer(); } - // Start a new variable-length list slot // // This function should be called before beginning to append elements to the @@ -188,6 +157,8 @@ class ARROW_EXPORT ListBuilder : public ArrayBuilder { BufferBuilder offset_builder_; std::shared_ptr value_builder_; std::shared_ptr values_; + + void Reset(); }; } // namespace arrow diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index 5ac2867932df7..121bd4794f291 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -123,8 +123,11 @@ class TestPrimitiveBuilder : public TestBuilder { auto expected = std::make_shared(size, ex_data, ex_null_count, ex_null_bitmap); - std::shared_ptr result = - std::dynamic_pointer_cast(builder->Finish()); + + std::shared_ptr out; + ASSERT_OK(builder->Finish(&out)); + + std::shared_ptr result = std::dynamic_pointer_cast(out); // Builder is now reset ASSERT_EQ(0, builder->length()); @@ -216,8 +219,10 @@ void TestPrimitiveBuilder::Check( auto expected = std::make_shared(size, ex_data, ex_null_count, ex_null_bitmap); - std::shared_ptr result = - std::dynamic_pointer_cast(builder->Finish()); + + std::shared_ptr out; + ASSERT_OK(builder->Finish(&out)); + std::shared_ptr result = std::dynamic_pointer_cast(out); // Builder is now reset ASSERT_EQ(0, builder->length()); @@ -254,7 +259,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) { int n = 1000; ASSERT_OK(this->builder_->Reserve(n)); ASSERT_EQ(util::next_power2(n), this->builder_->capacity()); - ASSERT_EQ(util::next_power2(type_traits::bytes_required(n)), + ASSERT_EQ(util::next_power2(TypeTraits::bytes_required(n)), this->builder_->data()->size()); // unsure if this should go in all builder classes @@ -267,7 +272,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) { ASSERT_OK(this->builder_->AppendNull()); } - auto result = this->builder_->Finish(); + std::shared_ptr result; + ASSERT_OK(this->builder_->Finish(&result)); for (int i = 0; i < size; ++i) { ASSERT_TRUE(result->IsNull(i)) << i; @@ -298,7 +304,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { } do { - std::shared_ptr result = this->builder_->Finish(); + std::shared_ptr result; + ASSERT_OK(this->builder_->Finish(&result)); } while (false); ASSERT_EQ(memory_before, this->pool_->bytes_allocated()); @@ -315,8 +322,7 @@ Status MakeArray(const vector& valid_bytes, const vector& draws, int RETURN_NOT_OK(builder->AppendNull()); } } - *out = builder->Finish(); - return Status::OK(); + return builder->Finish(out); } TYPED_TEST(TestPrimitiveBuilder, Equality) { @@ -465,7 +471,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) { ASSERT_OK(this->builder_->Reserve(cap)); ASSERT_EQ(cap, this->builder_->capacity()); - ASSERT_EQ(type_traits::bytes_required(cap), this->builder_->data()->size()); + ASSERT_EQ(TypeTraits::bytes_required(cap), this->builder_->data()->size()); ASSERT_EQ(util::bytes_for_bits(cap), this->builder_->null_bitmap()->size()); } diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index 9ba2ebdcc2d5b..3a05ccfdf1861 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -69,12 +69,25 @@ bool PrimitiveArray::Equals(const std::shared_ptr& arr) const { return EqualsExact(*static_cast(arr.get())); } +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; +template class NumericArray; + template Status PrimitiveBuilder::Init(int32_t capacity) { RETURN_NOT_OK(ArrayBuilder::Init(capacity)); data_ = std::make_shared(pool_); - int64_t nbytes = type_traits::bytes_required(capacity); + int64_t nbytes = TypeTraits::bytes_required(capacity); RETURN_NOT_OK(data_->Resize(nbytes)); // TODO(emkornfield) valgrind complains without this memset(data_->mutable_data(), 0, nbytes); @@ -93,10 +106,9 @@ Status PrimitiveBuilder::Resize(int32_t capacity) { } else { RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); const int64_t old_bytes = data_->size(); - const int64_t new_bytes = type_traits::bytes_required(capacity); + const int64_t new_bytes = TypeTraits::bytes_required(capacity); RETURN_NOT_OK(data_->Resize(new_bytes)); raw_data_ = reinterpret_cast(data_->mutable_data()); - memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); } return Status::OK(); @@ -108,7 +120,7 @@ Status PrimitiveBuilder::Append( RETURN_NOT_OK(Reserve(length)); if (length > 0) { - memcpy(raw_data_ + length_, values, type_traits::bytes_required(length)); + memcpy(raw_data_ + length_, values, TypeTraits::bytes_required(length)); } // length_ is update by these @@ -118,13 +130,18 @@ Status PrimitiveBuilder::Append( } template -std::shared_ptr PrimitiveBuilder::Finish() { - std::shared_ptr result = std::make_shared::ArrayType>( +Status PrimitiveBuilder::Finish(std::shared_ptr* out) { + const int64_t bytes_required = TypeTraits::bytes_required(length_); + if (bytes_required > 0 && bytes_required < data_->size()) { + // Trim buffers + RETURN_NOT_OK(data_->Resize(bytes_required)); + } + *out = std::make_shared::ArrayType>( type_, length_, data_, null_count_, null_bitmap_); data_ = null_bitmap_ = nullptr; capacity_ = length_ = null_count_ = 0; - return result; + return Status::OK(); } template <> diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index c643783f681bd..f21470d96e45b 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -91,7 +91,9 @@ class ARROW_EXPORT NumericArray : public PrimitiveArray { value_type Value(int i) const { return raw_data()[i]; } }; -#define NUMERIC_ARRAY_DECL(NAME, TypeClass) using NAME = NumericArray; +#define NUMERIC_ARRAY_DECL(NAME, TypeClass) \ + using NAME = NumericArray; \ + extern template class ARROW_EXPORT NumericArray; NUMERIC_ARRAY_DECL(UInt8Array, UInt8Type); NUMERIC_ARRAY_DECL(Int8Array, Int8Type); @@ -139,8 +141,7 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { Status Append( const value_type* values, int32_t length, const uint8_t* valid_bytes = nullptr); - std::shared_ptr Finish() override; - + Status Finish(std::shared_ptr* out) override; Status Init(int32_t capacity) override; // Increase the capacity of the builder to accommodate at least the indicated @@ -183,77 +184,77 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder { }; template <> -struct type_traits { +struct TypeTraits { typedef UInt8Array ArrayType; static inline int bytes_required(int elements) { return elements; } }; template <> -struct type_traits { +struct TypeTraits { typedef Int8Array ArrayType; static inline int bytes_required(int elements) { return elements; } }; template <> -struct type_traits { +struct TypeTraits { typedef UInt16Array ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(uint16_t); } }; template <> -struct type_traits { +struct TypeTraits { typedef Int16Array ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(int16_t); } }; template <> -struct type_traits { +struct TypeTraits { typedef UInt32Array ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(uint32_t); } }; template <> -struct type_traits { +struct TypeTraits { typedef Int32Array ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(int32_t); } }; template <> -struct type_traits { +struct TypeTraits { typedef UInt64Array ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(uint64_t); } }; template <> -struct type_traits { +struct TypeTraits { typedef Int64Array ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } }; template <> -struct type_traits { +struct TypeTraits { typedef TimestampArray ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(int64_t); } }; template <> -struct type_traits { +struct TypeTraits { typedef FloatArray ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(float); } }; template <> -struct type_traits { +struct TypeTraits { typedef DoubleArray ArrayType; static inline int bytes_required(int elements) { return elements * sizeof(double); } @@ -293,7 +294,7 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray { }; template <> -struct type_traits { +struct TypeTraits { typedef BooleanArray ArrayType; static inline int bytes_required(int elements) { diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc index 6807b00e8ca99..d897e30a3c6a2 100644 --- a/cpp/src/arrow/types/string-test.cc +++ b/cpp/src/arrow/types/string-test.cc @@ -66,18 +66,13 @@ class TestStringContainer : public ::testing::Test { void MakeArray() { length_ = offsets_.size() - 1; - int nchars = chars_.size(); - value_buf_ = test::to_buffer(chars_); - values_ = ArrayPtr(new UInt8Array(nchars, value_buf_)); - offsets_buf_ = test::to_buffer(offsets_); - null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_); null_count_ = test::null_count(valid_bytes_); strings_ = std::make_shared( - length_, offsets_buf_, values_, null_count_, null_bitmap_); + length_, offsets_buf_, value_buf_, null_count_, null_bitmap_); } protected: @@ -94,7 +89,6 @@ class TestStringContainer : public ::testing::Test { int null_count_; int length_; - ArrayPtr values_; std::shared_ptr strings_; }; @@ -122,7 +116,7 @@ TEST_F(TestStringContainer, TestListFunctions) { TEST_F(TestStringContainer, TestDestructor) { auto arr = std::make_shared( - length_, offsets_buf_, values_, null_count_, null_bitmap_); + length_, offsets_buf_, value_buf_, null_count_, null_bitmap_); } TEST_F(TestStringContainer, TestGetString) { @@ -147,7 +141,10 @@ class TestStringBuilder : public TestBuilder { } void Done() { - result_ = std::dynamic_pointer_cast(builder_->Finish()); + std::shared_ptr out; + EXPECT_OK(builder_->Finish(&out)); + + result_ = std::dynamic_pointer_cast(out); result_->Validate(); } @@ -178,7 +175,7 @@ TEST_F(TestStringBuilder, TestScalarAppend) { ASSERT_EQ(reps * N, result_->length()); ASSERT_EQ(reps, result_->null_count()); - ASSERT_EQ(reps * 6, result_->values()->length()); + ASSERT_EQ(reps * 6, result_->data()->size()); int32_t length; int32_t pos = 0; @@ -218,18 +215,14 @@ class TestBinaryContainer : public ::testing::Test { void MakeArray() { length_ = offsets_.size() - 1; - int nchars = chars_.size(); - value_buf_ = test::to_buffer(chars_); - values_ = ArrayPtr(new UInt8Array(nchars, value_buf_)); - offsets_buf_ = test::to_buffer(offsets_); null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_); null_count_ = test::null_count(valid_bytes_); strings_ = std::make_shared( - length_, offsets_buf_, values_, null_count_, null_bitmap_); + length_, offsets_buf_, value_buf_, null_count_, null_bitmap_); } protected: @@ -246,7 +239,6 @@ class TestBinaryContainer : public ::testing::Test { int null_count_; int length_; - ArrayPtr values_; std::shared_ptr strings_; }; @@ -274,7 +266,7 @@ TEST_F(TestBinaryContainer, TestListFunctions) { TEST_F(TestBinaryContainer, TestDestructor) { auto arr = std::make_shared( - length_, offsets_buf_, values_, null_count_, null_bitmap_); + length_, offsets_buf_, value_buf_, null_count_, null_bitmap_); } TEST_F(TestBinaryContainer, TestGetValue) { @@ -298,7 +290,10 @@ class TestBinaryBuilder : public TestBuilder { } void Done() { - result_ = std::dynamic_pointer_cast(builder_->Finish()); + std::shared_ptr out; + EXPECT_OK(builder_->Finish(&out)); + + result_ = std::dynamic_pointer_cast(out); result_->Validate(); } @@ -330,7 +325,7 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) { ASSERT_OK(result_->Validate()); ASSERT_EQ(reps * N, result_->length()); ASSERT_EQ(reps, result_->null_count()); - ASSERT_EQ(reps * 6, result_->values()->length()); + ASSERT_EQ(reps * 6, result_->data()->size()); int32_t length; for (int i = 0; i < N * reps; ++i) { diff --git a/cpp/src/arrow/types/string.cc b/cpp/src/arrow/types/string.cc index 745ed8f7edb99..d692e13773f56 100644 --- a/cpp/src/arrow/types/string.cc +++ b/cpp/src/arrow/types/string.cc @@ -17,6 +17,7 @@ #include "arrow/types/string.h" +#include #include #include @@ -24,37 +25,77 @@ namespace arrow { -const std::shared_ptr BINARY(new BinaryType()); -const std::shared_ptr STRING(new StringType()); +static std::shared_ptr kBinary = std::make_shared(); +static std::shared_ptr kString = std::make_shared(); BinaryArray::BinaryArray(int32_t length, const std::shared_ptr& offsets, - const ArrayPtr& values, int32_t null_count, + const std::shared_ptr& data, int32_t null_count, const std::shared_ptr& null_bitmap) - : BinaryArray(BINARY, length, offsets, values, null_count, null_bitmap) {} + : BinaryArray(kBinary, length, offsets, data, null_count, null_bitmap) {} BinaryArray::BinaryArray(const TypePtr& type, int32_t length, - const std::shared_ptr& offsets, const ArrayPtr& values, int32_t null_count, - const std::shared_ptr& null_bitmap) - : ListArray(type, length, offsets, values, null_count, null_bitmap), - bytes_(std::dynamic_pointer_cast(values).get()), - raw_bytes_(bytes_->raw_data()) { - // Check in case the dynamic cast fails. - DCHECK(bytes_); + const std::shared_ptr& offsets, const std::shared_ptr& data, + int32_t null_count, const std::shared_ptr& null_bitmap) + : Array(type, length, null_count, null_bitmap), + offset_buffer_(offsets), + offsets_(reinterpret_cast(offset_buffer_->data())), + data_buffer_(data), + data_(nullptr) { + if (data_buffer_ != nullptr) { data_ = data_buffer_->data(); } } Status BinaryArray::Validate() const { - if (values()->null_count() > 0) { - std::stringstream ss; - ss << type()->ToString() << " can have null values in the value array"; - Status::Invalid(ss.str()); + // TODO(wesm): what to do here? + return Status::OK(); +} + +bool BinaryArray::EqualsExact(const BinaryArray& other) const { + if (!Array::EqualsExact(other)) { return false; } + + bool equal_offsets = + offset_buffer_->Equals(*other.offset_buffer_, (length_ + 1) * sizeof(int32_t)); + if (!equal_offsets) { return false; } + + return data_buffer_->Equals(*other.data_buffer_, data_buffer_->size()); +} + +bool BinaryArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (this->type_enum() != arr->type_enum()) { return false; } + return EqualsExact(*static_cast(arr.get())); +} + +bool BinaryArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + const auto other = static_cast(arr.get()); + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { + const bool is_null = IsNull(i); + if (is_null != arr->IsNull(o_i)) { return false; } + if (is_null) continue; + const int32_t begin_offset = offset(i); + const int32_t end_offset = offset(i + 1); + const int32_t other_begin_offset = other->offset(o_i); + const int32_t other_end_offset = other->offset(o_i + 1); + // Underlying can't be equal if the size isn't equal + if (end_offset - begin_offset != other_end_offset - other_begin_offset) { + return false; + } + + if (std::memcmp(data_ + begin_offset, other->data_ + other_begin_offset, + end_offset - begin_offset)) { + return false; + } } - return ListArray::Validate(); + return true; } StringArray::StringArray(int32_t length, const std::shared_ptr& offsets, - const ArrayPtr& values, int32_t null_count, + const std::shared_ptr& data, int32_t null_count, const std::shared_ptr& null_bitmap) - : StringArray(STRING, length, offsets, values, null_count, null_bitmap) {} + : BinaryArray(kString, length, offsets, data, null_count, null_bitmap) {} Status StringArray::Validate() const { // TODO(emkornfield) Validate proper UTF8 code points? @@ -72,4 +113,28 @@ BinaryBuilder::BinaryBuilder(MemoryPool* pool, const TypePtr& type) byte_builder_ = static_cast(value_builder_.get()); } +Status BinaryBuilder::Finish(std::shared_ptr* out) { + std::shared_ptr result; + RETURN_NOT_OK(ListBuilder::Finish(&result)); + + const auto list = std::dynamic_pointer_cast(result); + auto values = std::dynamic_pointer_cast(list->values()); + + *out = std::make_shared(list->length(), list->offset_buffer(), + values->data(), list->null_count(), list->null_bitmap()); + return Status::OK(); +} + +Status StringBuilder::Finish(std::shared_ptr* out) { + std::shared_ptr result; + RETURN_NOT_OK(ListBuilder::Finish(&result)); + + const auto list = std::dynamic_pointer_cast(result); + auto values = std::dynamic_pointer_cast(list->values()); + + *out = std::make_shared(list->length(), list->offset_buffer(), + values->data(), list->null_count(), list->null_bitmap()); + return Status::OK(); +} + } // namespace arrow diff --git a/cpp/src/arrow/types/string.h b/cpp/src/arrow/types/string.h index bab0c58f617b2..aaba49c60237d 100644 --- a/cpp/src/arrow/types/string.h +++ b/cpp/src/arrow/types/string.h @@ -35,15 +35,16 @@ namespace arrow { class Buffer; class MemoryPool; -class ARROW_EXPORT BinaryArray : public ListArray { +class ARROW_EXPORT BinaryArray : public Array { public: BinaryArray(int32_t length, const std::shared_ptr& offsets, - const ArrayPtr& values, int32_t null_count = 0, + const std::shared_ptr& data, int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); + // Constructor that allows sub-classes/builders to propagate there logical type up the // class hierarchy. BinaryArray(const TypePtr& type, int32_t length, const std::shared_ptr& offsets, - const ArrayPtr& values, int32_t null_count = 0, + const std::shared_ptr& data, int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); // Return the pointer to the given elements bytes @@ -53,28 +54,38 @@ class ARROW_EXPORT BinaryArray : public ListArray { DCHECK(out_length); const int32_t pos = offsets_[i]; *out_length = offsets_[i + 1] - pos; - return raw_bytes_ + pos; + return data_ + pos; } + std::shared_ptr data() const { return data_buffer_; } + std::shared_ptr offsets() const { return offset_buffer_; } + + int32_t offset(int i) const { return offsets_[i]; } + + // Neither of these functions will perform boundschecking + int32_t value_offset(int i) const { return offsets_[i]; } + int32_t value_length(int i) const { return offsets_[i + 1] - offsets_[i]; } + + bool EqualsExact(const BinaryArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const ArrayPtr& arr) const override; + Status Validate() const override; private: - UInt8Array* bytes_; - const uint8_t* raw_bytes_; + std::shared_ptr offset_buffer_; + const int32_t* offsets_; + + std::shared_ptr data_buffer_; + const uint8_t* data_; }; class ARROW_EXPORT StringArray : public BinaryArray { public: StringArray(int32_t length, const std::shared_ptr& offsets, - const ArrayPtr& values, int32_t null_count = 0, + const std::shared_ptr& data, int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); - // Constructor that allows overriding the logical type, so subclasses can propagate - // there - // up the class hierarchy. - StringArray(const TypePtr& type, int32_t length, const std::shared_ptr& offsets, - const ArrayPtr& values, int32_t null_count = 0, - const std::shared_ptr& null_bitmap = nullptr) - : BinaryArray(type, length, offsets, values, null_count, null_bitmap) {} // Construct a std::string // TODO: std::bad_alloc possibility @@ -98,9 +109,7 @@ class ARROW_EXPORT BinaryBuilder : public ListBuilder { return byte_builder_->Append(value, length); } - std::shared_ptr Finish() override { - return ListBuilder::Transfer(); - } + Status Finish(std::shared_ptr* out) override; protected: UInt8Builder* byte_builder_; @@ -112,6 +121,8 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder { explicit StringBuilder(MemoryPool* pool, const TypePtr& type) : BinaryBuilder(pool, type) {} + Status Finish(std::shared_ptr* out) override; + Status Append(const std::string& value) { return Append(value.c_str(), value.size()); } Status Append(const char* value, int32_t length) { @@ -119,10 +130,6 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder { } Status Append(const std::vector& values, uint8_t* null_bytes); - - std::shared_ptr Finish() override { - return ListBuilder::Transfer(); - } }; } // namespace arrow diff --git a/cpp/src/arrow/types/struct-test.cc b/cpp/src/arrow/types/struct-test.cc index ccf5a52dc831c..8e82c389a9423 100644 --- a/cpp/src/arrow/types/struct-test.cc +++ b/cpp/src/arrow/types/struct-test.cc @@ -119,7 +119,11 @@ class TestStructBuilder : public TestBuilder { ASSERT_EQ(2, static_cast(builder_->field_builders().size())); } - void Done() { result_ = std::dynamic_pointer_cast(builder_->Finish()); } + void Done() { + std::shared_ptr out; + ASSERT_OK(builder_->Finish(&out)); + result_ = std::dynamic_pointer_cast(out); + } protected: std::vector value_fields_; @@ -294,7 +298,8 @@ TEST_F(TestStructBuilder, TestEquality) { for (int32_t value : int_values) { int_vb->UnsafeAppend(value); } - array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&array)); ASSERT_OK(builder_->Resize(list_lengths.size())); ASSERT_OK(char_vb->Resize(list_values.size())); @@ -308,7 +313,8 @@ TEST_F(TestStructBuilder, TestEquality) { for (int32_t value : int_values) { int_vb->UnsafeAppend(value); } - equal_array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&equal_array)); ASSERT_OK(builder_->Resize(list_lengths.size())); ASSERT_OK(char_vb->Resize(list_values.size())); @@ -323,7 +329,8 @@ TEST_F(TestStructBuilder, TestEquality) { for (int32_t value : int_values) { int_vb->UnsafeAppend(value); } - unequal_bitmap_array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&unequal_bitmap_array)); ASSERT_OK(builder_->Resize(list_lengths.size())); ASSERT_OK(char_vb->Resize(list_values.size())); @@ -339,7 +346,8 @@ TEST_F(TestStructBuilder, TestEquality) { for (int32_t value : int_values) { int_vb->UnsafeAppend(value); } - unequal_offsets_array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&unequal_offsets_array)); ASSERT_OK(builder_->Resize(list_lengths.size())); ASSERT_OK(char_vb->Resize(list_values.size())); @@ -354,7 +362,8 @@ TEST_F(TestStructBuilder, TestEquality) { for (int32_t value : unequal_int_values) { int_vb->UnsafeAppend(value); } - unequal_values_array = builder_->Finish(); + + ASSERT_OK(builder_->Finish(&unequal_values_array)); // Test array equality EXPECT_TRUE(array->Equals(array)); diff --git a/cpp/src/arrow/types/struct.cc b/cpp/src/arrow/types/struct.cc index e8176f08268b4..369c29d15ef96 100644 --- a/cpp/src/arrow/types/struct.cc +++ b/cpp/src/arrow/types/struct.cc @@ -87,4 +87,18 @@ Status StructArray::Validate() const { return Status::OK(); } +Status StructBuilder::Finish(std::shared_ptr* out) { + std::vector> fields(field_builders_.size()); + for (size_t i = 0; i < field_builders_.size(); ++i) { + RETURN_NOT_OK(field_builders_[i]->Finish(&fields[i])); + } + + *out = std::make_shared(type_, length_, fields, null_count_, null_bitmap_); + + null_bitmap_ = nullptr; + capacity_ = length_ = null_count_ = 0; + + return Status::OK(); +} + } // namespace arrow diff --git a/cpp/src/arrow/types/struct.h b/cpp/src/arrow/types/struct.h index 63955eb31bb7d..65b8daf214a69 100644 --- a/cpp/src/arrow/types/struct.h +++ b/cpp/src/arrow/types/struct.h @@ -73,6 +73,8 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder { field_builders_ = field_builders; } + Status Finish(std::shared_ptr* out) override; + // Null bitmap is of equal length to every child field, and any zero byte // will be considered as a null for that field, but users must using app- // end methods or advance methods of the child builders' independently to @@ -83,21 +85,6 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder { return Status::OK(); } - std::shared_ptr Finish() override { - std::vector fields; - for (auto it : field_builders_) { - fields.push_back(it->Finish()); - } - - auto result = - std::make_shared(type_, length_, fields, null_count_, null_bitmap_); - - null_bitmap_ = nullptr; - capacity_ = length_ = null_count_ = 0; - - return result; - } - // Append an element to the Struct. All child-builders' Append method must // be called independently to maintain data-structure consistency. Status Append(bool is_valid = true) { diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc index 8dd07d0d064e7..08e9ae3946e51 100644 --- a/cpp/src/arrow/util/status.cc +++ b/cpp/src/arrow/util/status.cc @@ -49,12 +49,18 @@ std::string Status::CodeAsString() const { case StatusCode::KeyError: type = "Key error"; break; + case StatusCode::TypeError: + type = "Type error"; + break; case StatusCode::Invalid: type = "Invalid"; break; case StatusCode::IOError: type = "IOError"; break; + case StatusCode::UnknownError: + type = "Unknown error"; + break; case StatusCode::NotImplemented: type = "NotImplemented"; break; diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/util/status.h index d5585313c728b..05f5b749b60cb 100644 --- a/cpp/src/arrow/util/status.h +++ b/cpp/src/arrow/util/status.h @@ -78,9 +78,10 @@ enum class StatusCode : char { OK = 0, OutOfMemory = 1, KeyError = 2, - Invalid = 3, - IOError = 4, - + TypeError = 3, + Invalid = 4, + IOError = 5, + UnknownError = 9, NotImplemented = 10, }; @@ -106,6 +107,14 @@ class ARROW_EXPORT Status { return Status(StatusCode::KeyError, msg, -1); } + static Status TypeError(const std::string& msg) { + return Status(StatusCode::TypeError, msg, -1); + } + + static Status UnknownError(const std::string& msg) { + return Status(StatusCode::UnknownError, msg, -1); + } + static Status NotImplemented(const std::string& msg) { return Status(StatusCode::NotImplemented, msg, -1); } @@ -125,6 +134,8 @@ class ARROW_EXPORT Status { bool IsKeyError() const { return code() == StatusCode::KeyError; } bool IsInvalid() const { return code() == StatusCode::Invalid; } bool IsIOError() const { return code() == StatusCode::IOError; } + + bool IsUnknownError() const { return code() == StatusCode::UnknownError; } bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; } // Return a string representation of this status suitable for printing. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 55f6d0543a108..4357fa05ff864 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -438,8 +438,6 @@ set(PYARROW_SRCS src/pyarrow/config.cc src/pyarrow/helpers.cc src/pyarrow/io.cc - src/pyarrow/status.cc - src/pyarrow/adapters/builtin.cc src/pyarrow/adapters/pandas.cc ) diff --git a/python/doc/index.rst b/python/doc/index.rst index 550e544eef9e8..88725badc1e24 100644 --- a/python/doc/index.rst +++ b/python/doc/index.rst @@ -1,3 +1,20 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + Apache Arrow (Python) ===================== @@ -25,4 +42,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/python/pyarrow/error.pxd b/python/pyarrow/error.pxd index 891d1ac1c7ea0..4fb46c25fafe4 100644 --- a/python/pyarrow/error.pxd +++ b/python/pyarrow/error.pxd @@ -16,7 +16,5 @@ # under the License. from pyarrow.includes.libarrow cimport CStatus -from pyarrow.includes.pyarrow cimport PyStatus -cdef int check_cstatus(const CStatus& status) nogil except -1 -cdef int check_status(const PyStatus& status) nogil except -1 +cdef int check_status(const CStatus& status) nogil except -1 diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx index a2c53fed8c6a0..b8a82b3754c1b 100644 --- a/python/pyarrow/error.pyx +++ b/python/pyarrow/error.pyx @@ -22,15 +22,7 @@ from pyarrow.compat import frombytes class ArrowException(Exception): pass -cdef int check_cstatus(const CStatus& status) nogil except -1: - if status.ok(): - return 0 - - cdef c_string c_message = status.ToString() - with gil: - raise ArrowException(frombytes(c_message)) - -cdef int check_status(const PyStatus& status) nogil except -1: +cdef int check_status(const CStatus& status) nogil except -1: if status.ok(): return 0 diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd index 7c47f21854e33..e1da1914c5743 100644 --- a/python/pyarrow/includes/pyarrow.pxd +++ b/python/pyarrow/includes/pyarrow.pxd @@ -25,36 +25,19 @@ cimport pyarrow.includes.libarrow_io as arrow_io cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil: - # We can later add more of the common status factory methods as needed - cdef PyStatus PyStatus_OK "Status::OK"() - - cdef cppclass PyStatus "pyarrow::Status": - PyStatus() - - c_string ToString() - - c_bool ok() - c_bool IsOutOfMemory() - c_bool IsKeyError() - c_bool IsTypeError() - c_bool IsIOError() - c_bool IsValueError() - c_bool IsNotImplemented() - c_bool IsArrowError() - shared_ptr[CDataType] GetPrimitiveType(Type type) - PyStatus ConvertPySequence(object obj, shared_ptr[CArray]* out) + CStatus ConvertPySequence(object obj, shared_ptr[CArray]* out) - PyStatus PandasToArrow(MemoryPool* pool, object ao, - shared_ptr[CArray]* out) - PyStatus PandasMaskedToArrow(MemoryPool* pool, object ao, object mo, - shared_ptr[CArray]* out) + CStatus PandasToArrow(MemoryPool* pool, object ao, + shared_ptr[CArray]* out) + CStatus PandasMaskedToArrow(MemoryPool* pool, object ao, object mo, + shared_ptr[CArray]* out) - PyStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr, - object py_ref, PyObject** out) + CStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr, + object py_ref, PyObject** out) - PyStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr, - object py_ref, PyObject** out) + CStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr, + object py_ref, PyObject** out) MemoryPool* get_memory_pool() diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx index 8970e06effdd0..16ebfa1138e46 100644 --- a/python/pyarrow/io.pyx +++ b/python/pyarrow/io.pyx @@ -28,7 +28,7 @@ cimport pyarrow.includes.pyarrow as pyarrow from pyarrow.includes.libarrow_io cimport * from pyarrow.compat import frombytes, tobytes -from pyarrow.error cimport check_cstatus +from pyarrow.error cimport check_status cimport cpython as cp @@ -57,9 +57,9 @@ cdef class NativeFile: if self.is_open: with nogil: if self.is_readonly: - check_cstatus(self.rd_file.get().Close()) + check_status(self.rd_file.get().Close()) else: - check_cstatus(self.wr_file.get().Close()) + check_status(self.wr_file.get().Close()) self.is_open = False cdef read_handle(self, shared_ptr[ReadableFileInterface]* file): @@ -88,22 +88,22 @@ cdef class NativeFile: cdef int64_t size self._assert_readable() with nogil: - check_cstatus(self.rd_file.get().GetSize(&size)) + check_status(self.rd_file.get().GetSize(&size)) return size def tell(self): cdef int64_t position with nogil: if self.is_readonly: - check_cstatus(self.rd_file.get().Tell(&position)) + check_status(self.rd_file.get().Tell(&position)) else: - check_cstatus(self.wr_file.get().Tell(&position)) + check_status(self.wr_file.get().Tell(&position)) return position def seek(self, int64_t position): self._assert_readable() with nogil: - check_cstatus(self.rd_file.get().Seek(position)) + check_status(self.rd_file.get().Seek(position)) def write(self, data): """ @@ -116,7 +116,7 @@ cdef class NativeFile: cdef const uint8_t* buf = cp.PyBytes_AS_STRING(data) cdef int64_t bufsize = len(data) with nogil: - check_cstatus(self.wr_file.get().Write(buf, bufsize)) + check_status(self.wr_file.get().Write(buf, bufsize)) def read(self, int nbytes): cdef: @@ -127,8 +127,7 @@ cdef class NativeFile: self._assert_readable() with nogil: - check_cstatus(self.rd_file.get() - .ReadB(nbytes, &out)) + check_status(self.rd_file.get().ReadB(nbytes, &out)) result = cp.PyBytes_FromStringAndSize( out.get().data(), out.get().size()) @@ -223,7 +222,7 @@ cdef class InMemoryOutputStream(NativeFile): def get_result(self): cdef Buffer result = Buffer() - check_cstatus(self.wr_file.get().Close()) + check_status(self.wr_file.get().Close()) result.init( self.buffer) self.is_open = False @@ -270,7 +269,7 @@ except ImportError: def have_libhdfs(): try: - check_cstatus(ConnectLibHdfs()) + check_status(ConnectLibHdfs()) return True except: return False @@ -304,7 +303,7 @@ cdef class HdfsClient: def close(self): self._ensure_client() with nogil: - check_cstatus(self.client.get().Disconnect()) + check_status(self.client.get().Disconnect()) self.is_open = False cdef _ensure_client(self): @@ -341,8 +340,7 @@ cdef class HdfsClient: conf.user = tobytes(user) with nogil: - check_cstatus( - CHdfsClient.Connect(&conf, &out.client)) + check_status(CHdfsClient.Connect(&conf, &out.client)) out.is_open = True return out @@ -383,8 +381,8 @@ cdef class HdfsClient: self._ensure_client() with nogil: - check_cstatus(self.client.get() - .ListDirectory(c_path, &listing)) + check_status(self.client.get() + .ListDirectory(c_path, &listing)) cdef const HdfsPathInfo* info for i in range( listing.size()): @@ -422,8 +420,8 @@ cdef class HdfsClient: cdef c_string c_path = tobytes(path) with nogil: - check_cstatus(self.client.get() - .CreateDirectory(c_path)) + check_status(self.client.get() + .CreateDirectory(c_path)) def delete(self, path, bint recursive=False): """ @@ -439,8 +437,8 @@ cdef class HdfsClient: cdef c_string c_path = tobytes(path) with nogil: - check_cstatus(self.client.get() - .Delete(c_path, recursive)) + check_status(self.client.get() + .Delete(c_path, recursive)) def open(self, path, mode='rb', buffer_size=None, replication=None, default_block_size=None): @@ -473,7 +471,7 @@ cdef class HdfsClient: append = True with nogil: - check_cstatus( + check_status( self.client.get() .OpenWriteable(c_path, append, c_buffer_size, c_replication, c_default_block_size, @@ -484,8 +482,8 @@ cdef class HdfsClient: out.is_readonly = False else: with nogil: - check_cstatus(self.client.get() - .OpenReadable(c_path, &rd_handle)) + check_status(self.client.get() + .OpenReadable(c_path, &rd_handle)) out.rd_file = rd_handle out.is_readonly = True @@ -579,9 +577,9 @@ cdef class HdfsFile(NativeFile): try: with nogil: while total_bytes < nbytes: - check_cstatus(self.rd_file.get() - .Read(rpc_chunksize, &bytes_read, - buf + total_bytes)) + check_status(self.rd_file.get() + .Read(rpc_chunksize, &bytes_read, + buf + total_bytes)) total_bytes += bytes_read @@ -647,8 +645,8 @@ cdef class HdfsFile(NativeFile): try: while True: with nogil: - check_cstatus(self.rd_file.get() - .Read(self.buffer_size, &bytes_read, buf)) + check_status(self.rd_file.get() + .Read(self.buffer_size, &bytes_read, buf)) total_bytes += bytes_read diff --git a/python/pyarrow/ipc.pyx b/python/pyarrow/ipc.pyx index f8da3a70da819..46deb5ad0c35d 100644 --- a/python/pyarrow/ipc.pyx +++ b/python/pyarrow/ipc.pyx @@ -26,7 +26,7 @@ from pyarrow.includes.libarrow_io cimport * from pyarrow.includes.libarrow_ipc cimport * cimport pyarrow.includes.pyarrow as pyarrow -from pyarrow.error cimport check_cstatus +from pyarrow.error cimport check_status from pyarrow.io cimport NativeFile from pyarrow.schema cimport Schema from pyarrow.table cimport RecordBatch @@ -89,8 +89,8 @@ cdef class ArrowFileWriter: get_writer(sink, &self.sink) with nogil: - check_cstatus(CFileWriter.Open(self.sink.get(), schema.sp_schema, - &self.writer)) + check_status(CFileWriter.Open(self.sink.get(), schema.sp_schema, + &self.writer)) self.closed = False @@ -101,12 +101,12 @@ cdef class ArrowFileWriter: def write_record_batch(self, RecordBatch batch): cdef CRecordBatch* bptr = batch.batch with nogil: - check_cstatus(self.writer.get() - .WriteRecordBatch(bptr.columns(), bptr.num_rows())) + check_status(self.writer.get() + .WriteRecordBatch(bptr.columns(), bptr.num_rows())) def close(self): with nogil: - check_cstatus(self.writer.get().Close()) + check_status(self.writer.get().Close()) self.closed = True @@ -124,9 +124,9 @@ cdef class ArrowFileReader: with nogil: if offset != 0: - check_cstatus(CFileReader.Open2(reader, offset, &self.reader)) + check_status(CFileReader.Open2(reader, offset, &self.reader)) else: - check_cstatus(CFileReader.Open(reader, &self.reader)) + check_status(CFileReader.Open(reader, &self.reader)) property num_dictionaries: @@ -147,7 +147,7 @@ cdef class ArrowFileReader: raise ValueError('Batch number {0} out of range'.format(i)) with nogil: - check_cstatus(self.reader.get().GetRecordBatch(i, &batch)) + check_status(self.reader.get().GetRecordBatch(i, &batch)) result = RecordBatch() result.init(batch) diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx index 2abe57b33ed48..019dd2c1de489 100644 --- a/python/pyarrow/parquet.pyx +++ b/python/pyarrow/parquet.pyx @@ -26,7 +26,7 @@ cimport pyarrow.includes.pyarrow as pyarrow from pyarrow.compat import tobytes from pyarrow.error import ArrowException -from pyarrow.error cimport check_cstatus +from pyarrow.error cimport check_status from pyarrow.io import NativeFile from pyarrow.table cimport Table @@ -62,7 +62,7 @@ cdef class ParquetReader: cdef shared_ptr[ReadableFileInterface] cpp_handle file.read_handle(&cpp_handle) - check_cstatus(OpenFile(cpp_handle, &self.allocator, &self.reader)) + check_status(OpenFile(cpp_handle, &self.allocator, &self.reader)) def read_all(self): cdef: @@ -70,8 +70,8 @@ cdef class ParquetReader: shared_ptr[CTable] ctable with nogil: - check_cstatus(self.reader.get() - .ReadFlatTable(&ctable)) + check_status(self.reader.get() + .ReadFlatTable(&ctable)) table.init(ctable) return table @@ -80,7 +80,7 @@ cdef class ParquetReader: def read_table(source, columns=None): """ Read a Table from Parquet format - + Returns ------- pyarrow.table.Table @@ -176,5 +176,5 @@ def write_table(table, filename, chunk_size=None, version=None, sink.reset(new LocalFileOutputStream(tobytes(filename))) with nogil: - check_cstatus(WriteFlatTable(ctable_, default_memory_pool(), sink, - chunk_size_, properties_builder.build())) + check_status(WriteFlatTable(ctable_, default_memory_pool(), sink, + chunk_size_, properties_builder.build())) diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index 680f3a539b5fa..c034fbd977747 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -20,13 +20,14 @@ #include "pyarrow/adapters/builtin.h" -#include +#include "arrow/api.h" +#include "arrow/util/status.h" #include "pyarrow/helpers.h" -#include "pyarrow/status.h" using arrow::ArrayBuilder; using arrow::DataType; +using arrow::Status; using arrow::Type; namespace pyarrow { @@ -129,7 +130,7 @@ class SeqVisitor { PyObject* item = item_ref.obj(); if (PyList_Check(item)) { - PY_RETURN_NOT_OK(Visit(item, level + 1)); + RETURN_NOT_OK(Visit(item, level + 1)); } else if (PyDict_Check(item)) { return Status::NotImplemented("No type inference for dicts"); } else { @@ -164,9 +165,9 @@ class SeqVisitor { Status Validate() const { if (scalars_.total_count() > 0) { if (num_nesting_levels() > 1) { - return Status::ValueError("Mixed nesting levels not supported"); + return Status::Invalid("Mixed nesting levels not supported"); } else if (max_observed_level() < max_nesting_level_) { - return Status::ValueError("Mixed nesting levels not supported"); + return Status::Invalid("Mixed nesting levels not supported"); } } return Status::OK(); @@ -216,8 +217,8 @@ static Status InferArrowType(PyObject* obj, int64_t* size, } SeqVisitor seq_visitor; - PY_RETURN_NOT_OK(seq_visitor.Visit(obj)); - PY_RETURN_NOT_OK(seq_visitor.Validate()); + RETURN_NOT_OK(seq_visitor.Visit(obj)); + RETURN_NOT_OK(seq_visitor.Validate()); *out_type = seq_visitor.GetType(); @@ -259,7 +260,7 @@ class BoolConverter : public TypedConverter { public: Status AppendData(PyObject* seq) override { Py_ssize_t size = PySequence_Size(seq); - RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); + RETURN_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -281,7 +282,7 @@ class Int64Converter : public TypedConverter { Status AppendData(PyObject* seq) override { int64_t val; Py_ssize_t size = PySequence_Size(seq); - RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); + RETURN_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -301,7 +302,7 @@ class DoubleConverter : public TypedConverter { Status AppendData(PyObject* seq) override { double val; Py_ssize_t size = PySequence_Size(seq); - RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); + RETURN_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -330,7 +331,7 @@ class StringConverter : public TypedConverter { OwnedRef holder(item); if (item == Py_None) { - RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + RETURN_NOT_OK(typed_builder_->AppendNull()); continue; } else if (PyUnicode_Check(item)) { tmp.reset(PyUnicode_AsUTF8String(item)); @@ -344,7 +345,7 @@ class StringConverter : public TypedConverter { // No error checking length = PyBytes_GET_SIZE(bytes_obj); bytes = PyBytes_AS_STRING(bytes_obj); - RETURN_ARROW_NOT_OK(typed_builder_->Append(bytes, length)); + RETURN_NOT_OK(typed_builder_->Append(bytes, length)); } return Status::OK(); } @@ -359,10 +360,10 @@ class ListConverter : public TypedConverter { for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { - RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + RETURN_NOT_OK(typed_builder_->AppendNull()); } else { typed_builder_->Append(); - PY_RETURN_NOT_OK(value_converter_->AppendData(item.obj())); + RETURN_NOT_OK(value_converter_->AppendData(item.obj())); } } return Status::OK(); @@ -408,7 +409,7 @@ Status ListConverter::Init(const std::shared_ptr& builder) { Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { std::shared_ptr type; int64_t size; - PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type)); + RETURN_NOT_OK(InferArrowType(obj, &size, &type)); // Handle NA / NullType case if (type->type == Type::NA) { @@ -426,14 +427,12 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { // Give the sequence converter an array builder std::shared_ptr builder; - RETURN_ARROW_NOT_OK(arrow::MakeBuilder(get_memory_pool(), type, &builder)); + RETURN_NOT_OK(arrow::MakeBuilder(get_memory_pool(), type, &builder)); converter->Init(builder); - PY_RETURN_NOT_OK(converter->AppendData(obj)); + RETURN_NOT_OK(converter->AppendData(obj)); - *out = builder->Finish(); - - return Status::OK(); + return builder->Finish(out); } } // namespace pyarrow diff --git a/python/src/pyarrow/adapters/builtin.h b/python/src/pyarrow/adapters/builtin.h index 4e997e31dd690..2ddfdaaf44134 100644 --- a/python/src/pyarrow/adapters/builtin.h +++ b/python/src/pyarrow/adapters/builtin.h @@ -30,14 +30,15 @@ #include "pyarrow/common.h" #include "pyarrow/visibility.h" -namespace arrow { class Array; } +namespace arrow { +class Array; +class Status; +} namespace pyarrow { -class Status; - PYARROW_EXPORT -Status ConvertPySequence(PyObject* obj, std::shared_ptr* out); +arrow::Status ConvertPySequence(PyObject* obj, std::shared_ptr* out); } // namespace pyarrow diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index b2fcd37aec944..5902b8341696d 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -31,10 +31,10 @@ #include "arrow/api.h" #include "arrow/util/bit-util.h" +#include "arrow/util/status.h" #include "pyarrow/common.h" #include "pyarrow/config.h" -#include "pyarrow/status.h" namespace pyarrow { @@ -42,6 +42,8 @@ using arrow::Array; using arrow::Column; using arrow::Field; using arrow::DataType; +using arrow::Status; + namespace util = arrow::util; // ---------------------------------------------------------------------- @@ -149,7 +151,7 @@ class ArrowSerializer { int null_bytes = util::bytes_for_bits(length_); null_bitmap_ = std::make_shared(pool_); - RETURN_ARROW_NOT_OK(null_bitmap_->Resize(null_bytes)); + RETURN_NOT_OK(null_bitmap_->Resize(null_bytes)); null_bitmap_data_ = null_bitmap_->mutable_data(); memset(null_bitmap_data_, 0, null_bytes); @@ -171,9 +173,9 @@ class ArrowSerializer { PyObject** objects = reinterpret_cast(PyArray_DATA(arr_)); arrow::TypePtr string_type(new arrow::StringType()); arrow::StringBuilder string_builder(pool_, string_type); - RETURN_ARROW_NOT_OK(string_builder.Resize(length_)); + RETURN_NOT_OK(string_builder.Resize(length_)); - arrow::Status s; + Status s; PyObject* obj; for (int64_t i = 0; i < length_; ++i) { obj = objects[i]; @@ -187,18 +189,16 @@ class ArrowSerializer { s = string_builder.Append(PyBytes_AS_STRING(obj), length); Py_DECREF(obj); if (!s.ok()) { - return Status::ArrowError(s.ToString()); + return s; } } else if (PyBytes_Check(obj)) { const int32_t length = PyBytes_GET_SIZE(obj); - RETURN_ARROW_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length)); + RETURN_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length)); } else { string_builder.AppendNull(); } } - *out = std::shared_ptr(string_builder.Finish()); - - return Status::OK(); + return string_builder.Finish(out); } Status ConvertBooleans(std::shared_ptr* out) { @@ -208,7 +208,7 @@ class ArrowSerializer { int nbytes = util::bytes_for_bits(length_); auto data = std::make_shared(pool_); - RETURN_ARROW_NOT_OK(data->Resize(nbytes)); + RETURN_NOT_OK(data->Resize(nbytes)); uint8_t* bitmap = data->mutable_data(); memset(bitmap, 0, nbytes); @@ -305,7 +305,7 @@ inline Status ArrowSerializer::MakeDataType(std::shared_ptrreset(new arrow::TimestampType(unit)); @@ -330,7 +330,7 @@ inline Status ArrowSerializer::Convert(std::shared_ptr* out) { RETURN_NOT_OK(ConvertData()); std::shared_ptr type; RETURN_NOT_OK(MakeDataType(&type)); - RETURN_ARROW_NOT_OK(MakePrimitiveArray(type, length_, data_, null_count, null_bitmap_, out)); + RETURN_NOT_OK(MakePrimitiveArray(type, length_, data_, null_count, null_bitmap_, out)); return Status::OK(); } @@ -389,7 +389,7 @@ template inline Status ArrowSerializer::ConvertData() { // TODO(wesm): strided arrays if (is_strided()) { - return Status::ValueError("no support for strided data yet"); + return Status::Invalid("no support for strided data yet"); } data_ = std::make_shared(arr_); @@ -399,12 +399,12 @@ inline Status ArrowSerializer::ConvertData() { template <> inline Status ArrowSerializer::ConvertData() { if (is_strided()) { - return Status::ValueError("no support for strided data yet"); + return Status::Invalid("no support for strided data yet"); } int nbytes = util::bytes_for_bits(length_); auto buffer = std::make_shared(pool_); - RETURN_ARROW_NOT_OK(buffer->Resize(nbytes)); + RETURN_NOT_OK(buffer->Resize(nbytes)); const uint8_t* values = reinterpret_cast(PyArray_DATA(arr_)); @@ -446,7 +446,7 @@ Status PandasMaskedToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, } if (PyArray_NDIM(arr) != 1) { - return Status::ValueError("only handle 1-dimensional arrays"); + return Status::Invalid("only handle 1-dimensional arrays"); } switch(PyArray_DESCR(arr)->type_num) { diff --git a/python/src/pyarrow/adapters/pandas.h b/python/src/pyarrow/adapters/pandas.h index 141d1219e64db..532495dd792db 100644 --- a/python/src/pyarrow/adapters/pandas.h +++ b/python/src/pyarrow/adapters/pandas.h @@ -32,27 +32,26 @@ namespace arrow { class Array; class Column; class MemoryPool; +class Status; } // namespace arrow namespace pyarrow { -class Status; - PYARROW_EXPORT -Status ConvertArrayToPandas(const std::shared_ptr& arr, PyObject* py_ref, - PyObject** out); +arrow::Status ConvertArrayToPandas(const std::shared_ptr& arr, + PyObject* py_ref, PyObject** out); PYARROW_EXPORT -Status ConvertColumnToPandas(const std::shared_ptr& col, PyObject* py_ref, - PyObject** out); +arrow::Status ConvertColumnToPandas(const std::shared_ptr& col, + PyObject* py_ref, PyObject** out); PYARROW_EXPORT -Status PandasMaskedToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, +arrow::Status PandasMaskedToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, std::shared_ptr* out); PYARROW_EXPORT -Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, +arrow::Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, std::shared_ptr* out); } // namespace pyarrow diff --git a/python/src/pyarrow/api.h b/python/src/pyarrow/api.h index 72be6afe02c76..6dbbc45d40ccc 100644 --- a/python/src/pyarrow/api.h +++ b/python/src/pyarrow/api.h @@ -18,8 +18,6 @@ #ifndef PYARROW_API_H #define PYARROW_API_H -#include "pyarrow/status.h" - #include "pyarrow/helpers.h" #include "pyarrow/adapters/builtin.h" diff --git a/python/src/pyarrow/common.cc b/python/src/pyarrow/common.cc index 09f3efb5a03bc..fa875f2b9aba1 100644 --- a/python/src/pyarrow/common.cc +++ b/python/src/pyarrow/common.cc @@ -21,10 +21,10 @@ #include #include -#include -#include +#include "arrow/util/memory-pool.h" +#include "arrow/util/status.h" -#include "pyarrow/status.h" +using arrow::Status; namespace pyarrow { @@ -33,18 +33,18 @@ class PyArrowMemoryPool : public arrow::MemoryPool { PyArrowMemoryPool() : bytes_allocated_(0) {} virtual ~PyArrowMemoryPool() {} - arrow::Status Allocate(int64_t size, uint8_t** out) override { + Status Allocate(int64_t size, uint8_t** out) override { std::lock_guard guard(pool_lock_); *out = static_cast(std::malloc(size)); if (*out == nullptr) { std::stringstream ss; ss << "malloc of size " << size << " failed"; - return arrow::Status::OutOfMemory(ss.str()); + return Status::OutOfMemory(ss.str()); } bytes_allocated_ += size; - return arrow::Status::OK(); + return Status::OK(); } int64_t bytes_allocated() const override { diff --git a/python/src/pyarrow/common.h b/python/src/pyarrow/common.h index 50c2577b93c9b..7f3131ef03dd8 100644 --- a/python/src/pyarrow/common.h +++ b/python/src/pyarrow/common.h @@ -29,13 +29,6 @@ namespace pyarrow { #define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2 -#define RETURN_ARROW_NOT_OK(s) do { \ - arrow::Status _s = (s); \ - if (!_s.ok()) { \ - return Status::ArrowError(s.ToString()); \ - } \ - } while (0); - class OwnedRef { public: OwnedRef() : obj_(nullptr) {} diff --git a/python/src/pyarrow/io.cc b/python/src/pyarrow/io.cc index 7bf32ffa8d22b..e6dbc12d429b0 100644 --- a/python/src/pyarrow/io.cc +++ b/python/src/pyarrow/io.cc @@ -20,12 +20,13 @@ #include #include -#include -#include -#include +#include "arrow/io/memory.h" +#include "arrow/util/memory-pool.h" +#include "arrow/util/status.h" #include "pyarrow/common.h" -#include "pyarrow/status.h" + +using arrow::Status; namespace pyarrow { @@ -41,7 +42,7 @@ PythonFile::~PythonFile() { Py_DECREF(file_); } -static arrow::Status CheckPyError() { +static Status CheckPyError() { if (PyErr_Occurred()) { PyObject *exc_type, *exc_value, *traceback; PyErr_Fetch(&exc_type, &exc_value, &traceback); @@ -51,35 +52,35 @@ static arrow::Status CheckPyError() { Py_XDECREF(exc_value); Py_XDECREF(traceback); PyErr_Clear(); - return arrow::Status::IOError(message); + return Status::IOError(message); } - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Close() { +Status PythonFile::Close() { // whence: 0 for relative to start of file, 2 for end of file PyObject* result = PyObject_CallMethod(file_, "close", "()"); Py_XDECREF(result); ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Seek(int64_t position, int whence) { +Status PythonFile::Seek(int64_t position, int whence) { // whence: 0 for relative to start of file, 2 for end of file PyObject* result = PyObject_CallMethod(file_, "seek", "(ii)", position, whence); Py_XDECREF(result); ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Read(int64_t nbytes, PyObject** out) { +Status PythonFile::Read(int64_t nbytes, PyObject** out) { PyObject* result = PyObject_CallMethod(file_, "read", "(i)", nbytes); ARROW_RETURN_NOT_OK(CheckPyError()); *out = result; - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Write(const uint8_t* data, int64_t nbytes) { +Status PythonFile::Write(const uint8_t* data, int64_t nbytes) { PyObject* py_data = PyBytes_FromStringAndSize( reinterpret_cast(data), nbytes); ARROW_RETURN_NOT_OK(CheckPyError()); @@ -88,10 +89,10 @@ arrow::Status PythonFile::Write(const uint8_t* data, int64_t nbytes) { Py_XDECREF(py_data); Py_XDECREF(result); ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Tell(int64_t* position) { +Status PythonFile::Tell(int64_t* position) { PyObject* result = PyObject_CallMethod(file_, "tell", "()"); ARROW_RETURN_NOT_OK(CheckPyError()); @@ -101,7 +102,7 @@ arrow::Status PythonFile::Tell(int64_t* position) { // PyLong_AsLongLong can raise OverflowError ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } // ---------------------------------------------------------------------- @@ -113,22 +114,22 @@ PyReadableFile::PyReadableFile(PyObject* file) { PyReadableFile::~PyReadableFile() {} -arrow::Status PyReadableFile::Close() { +Status PyReadableFile::Close() { PyGILGuard lock; return file_->Close(); } -arrow::Status PyReadableFile::Seek(int64_t position) { +Status PyReadableFile::Seek(int64_t position) { PyGILGuard lock; return file_->Seek(position, 0); } -arrow::Status PyReadableFile::Tell(int64_t* position) { +Status PyReadableFile::Tell(int64_t* position) { PyGILGuard lock; return file_->Tell(position); } -arrow::Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) { +Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) { PyGILGuard lock; PyObject* bytes_obj; ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj)); @@ -137,10 +138,10 @@ arrow::Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* std::memcpy(out, PyBytes_AS_STRING(bytes_obj), *bytes_read); Py_DECREF(bytes_obj); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr* out) { +Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr* out) { PyGILGuard lock; PyObject* bytes_obj; @@ -149,10 +150,10 @@ arrow::Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr(bytes_obj); Py_DECREF(bytes_obj); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PyReadableFile::GetSize(int64_t* size) { +Status PyReadableFile::GetSize(int64_t* size) { PyGILGuard lock; int64_t current_position;; @@ -167,7 +168,7 @@ arrow::Status PyReadableFile::GetSize(int64_t* size) { ARROW_RETURN_NOT_OK(file_->Seek(current_position, 0)); *size = file_size; - return arrow::Status::OK(); + return Status::OK(); } bool PyReadableFile::supports_zero_copy() const { @@ -183,17 +184,17 @@ PyOutputStream::PyOutputStream(PyObject* file) { PyOutputStream::~PyOutputStream() {} -arrow::Status PyOutputStream::Close() { +Status PyOutputStream::Close() { PyGILGuard lock; return file_->Close(); } -arrow::Status PyOutputStream::Tell(int64_t* position) { +Status PyOutputStream::Tell(int64_t* position) { PyGILGuard lock; return file_->Tell(position); } -arrow::Status PyOutputStream::Write(const uint8_t* data, int64_t nbytes) { +Status PyOutputStream::Write(const uint8_t* data, int64_t nbytes) { PyGILGuard lock; return file_->Write(data, nbytes); } diff --git a/python/src/pyarrow/status.cc b/python/src/pyarrow/status.cc deleted file mode 100644 index 1cd54f6a78560..0000000000000 --- a/python/src/pyarrow/status.cc +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A Status encapsulates the result of an operation. It may indicate success, -// or it may indicate an error with an associated error message. -// -// Multiple threads can invoke const methods on a Status without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same Status must use -// external synchronization. - -#include "pyarrow/status.h" - -#include -#include -#include - -namespace pyarrow { - -Status::Status(StatusCode code, const std::string& msg, int16_t posix_code) { - assert(code != StatusCode::OK); - const uint32_t size = msg.size(); - char* result = new char[size + 7]; - memcpy(result, &size, sizeof(size)); - result[4] = static_cast(code); - memcpy(result + 5, &posix_code, sizeof(posix_code)); - memcpy(result + 7, msg.c_str(), msg.size()); - state_ = result; -} - -const char* Status::CopyState(const char* state) { - uint32_t size; - memcpy(&size, state, sizeof(size)); - char* result = new char[size + 7]; - memcpy(result, state, size + 7); - return result; -} - -std::string Status::CodeAsString() const { - if (state_ == NULL) { - return "OK"; - } - - const char* type; - switch (code()) { - case StatusCode::OK: - type = "OK"; - break; - case StatusCode::OutOfMemory: - type = "Out of memory"; - break; - case StatusCode::KeyError: - type = "Key error"; - break; - case StatusCode::TypeError: - type = "Value error"; - break; - case StatusCode::ValueError: - type = "Value error"; - break; - case StatusCode::IOError: - type = "IO error"; - break; - case StatusCode::NotImplemented: - type = "Not implemented"; - break; - case StatusCode::ArrowError: - type = "Arrow C++ error"; - break; - case StatusCode::UnknownError: - type = "Unknown error"; - break; - } - return std::string(type); -} - -std::string Status::ToString() const { - std::string result(CodeAsString()); - if (state_ == NULL) { - return result; - } - - result.append(": "); - - uint32_t length; - memcpy(&length, state_, sizeof(length)); - result.append(reinterpret_cast(state_ + 7), length); - return result; -} - -} // namespace pyarrow diff --git a/python/src/pyarrow/status.h b/python/src/pyarrow/status.h deleted file mode 100644 index 67cd66c58eeb3..0000000000000 --- a/python/src/pyarrow/status.h +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A Status encapsulates the result of an operation. It may indicate success, -// or it may indicate an error with an associated error message. -// -// Multiple threads can invoke const methods on a Status without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same Status must use -// external synchronization. - -#ifndef PYARROW_STATUS_H_ -#define PYARROW_STATUS_H_ - -#include -#include -#include - -#include "pyarrow/visibility.h" - -namespace pyarrow { - -#define PY_RETURN_NOT_OK(s) do { \ - Status _s = (s); \ - if (!_s.ok()) return _s; \ - } while (0); - -enum class StatusCode: char { - OK = 0, - OutOfMemory = 1, - KeyError = 2, - TypeError = 3, - ValueError = 4, - IOError = 5, - NotImplemented = 6, - - ArrowError = 7, - - UnknownError = 10 -}; - -class PYARROW_EXPORT Status { - public: - // Create a success status. - Status() : state_(NULL) { } - ~Status() { delete[] state_; } - - // Copy the specified status. - Status(const Status& s); - void operator=(const Status& s); - - // Return a success status. - static Status OK() { return Status(); } - - // Return error status of an appropriate type. - static Status OutOfMemory(const std::string& msg, int16_t posix_code = -1) { - return Status(StatusCode::OutOfMemory, msg, posix_code); - } - - static Status KeyError(const std::string& msg) { - return Status(StatusCode::KeyError, msg, -1); - } - - static Status TypeError(const std::string& msg) { - return Status(StatusCode::TypeError, msg, -1); - } - - static Status IOError(const std::string& msg) { - return Status(StatusCode::IOError, msg, -1); - } - - static Status ValueError(const std::string& msg) { - return Status(StatusCode::ValueError, msg, -1); - } - - static Status NotImplemented(const std::string& msg) { - return Status(StatusCode::NotImplemented, msg, -1); - } - - static Status UnknownError(const std::string& msg) { - return Status(StatusCode::UnknownError, msg, -1); - } - - static Status ArrowError(const std::string& msg) { - return Status(StatusCode::ArrowError, msg, -1); - } - - // Returns true iff the status indicates success. - bool ok() const { return (state_ == NULL); } - - bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; } - bool IsKeyError() const { return code() == StatusCode::KeyError; } - bool IsIOError() const { return code() == StatusCode::IOError; } - bool IsTypeError() const { return code() == StatusCode::TypeError; } - bool IsValueError() const { return code() == StatusCode::ValueError; } - - bool IsUnknownError() const { return code() == StatusCode::UnknownError; } - - bool IsArrowError() const { return code() == StatusCode::ArrowError; } - - // Return a string representation of this status suitable for printing. - // Returns the string "OK" for success. - std::string ToString() const; - - // Return a string representation of the status code, without the message - // text or posix code information. - std::string CodeAsString() const; - - // Get the POSIX code associated with this Status, or -1 if there is none. - int16_t posix_code() const; - - private: - // OK status has a NULL state_. Otherwise, state_ is a new[] array - // of the following form: - // state_[0..3] == length of message - // state_[4] == code - // state_[5..6] == posix_code - // state_[7..] == message - const char* state_; - - StatusCode code() const { - return ((state_ == NULL) ? - StatusCode::OK : static_cast(state_[4])); - } - - Status(StatusCode code, const std::string& msg, int16_t posix_code); - static const char* CopyState(const char* s); -}; - -inline Status::Status(const Status& s) { - state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); -} - -inline void Status::operator=(const Status& s) { - // The following condition catches both aliasing (when this == &s), - // and the common case where both s and *this are ok. - if (state_ != s.state_) { - delete[] state_; - state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); - } -} - -} // namespace pyarrow - -#endif // PYARROW_STATUS_H_