From 1e65b01386e5d60851dfc8088e18a489aa4093b2 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Mon, 17 Oct 2016 09:33:26 -0400 Subject: [PATCH] Deprecate pyarrow::Status in favor of just arrow::Status. Conform pyarrow use of ArrayBuilder::Finish Change-Id: I4f5992b52eeb7421f68f199bcdd549b61b8e4f70 --- cpp/src/arrow/util/status.cc | 6 + cpp/src/arrow/util/status.h | 17 ++- python/CMakeLists.txt | 2 - python/pyarrow/error.pxd | 4 +- python/pyarrow/error.pyx | 10 +- python/pyarrow/includes/pyarrow.pxd | 35 ++---- python/pyarrow/io.pyx | 56 +++++----- python/pyarrow/ipc.pyx | 18 +-- python/pyarrow/parquet.pyx | 14 +-- python/src/pyarrow/adapters/builtin.cc | 39 ++++--- python/src/pyarrow/adapters/builtin.h | 9 +- python/src/pyarrow/adapters/pandas.cc | 32 +++--- python/src/pyarrow/adapters/pandas.h | 15 ++- python/src/pyarrow/api.h | 2 - python/src/pyarrow/common.cc | 12 +- python/src/pyarrow/common.h | 7 -- python/src/pyarrow/io.cc | 59 +++++----- python/src/pyarrow/status.cc | 92 ---------------- python/src/pyarrow/status.h | 146 ------------------------- 19 files changed, 157 insertions(+), 418 deletions(-) delete mode 100644 python/src/pyarrow/status.cc delete mode 100644 python/src/pyarrow/status.h diff --git a/cpp/src/arrow/util/status.cc b/cpp/src/arrow/util/status.cc index 8dd07d0d064e7..08e9ae3946e51 100644 --- a/cpp/src/arrow/util/status.cc +++ b/cpp/src/arrow/util/status.cc @@ -49,12 +49,18 @@ std::string Status::CodeAsString() const { case StatusCode::KeyError: type = "Key error"; break; + case StatusCode::TypeError: + type = "Type error"; + break; case StatusCode::Invalid: type = "Invalid"; break; case StatusCode::IOError: type = "IOError"; break; + case StatusCode::UnknownError: + type = "Unknown error"; + break; case StatusCode::NotImplemented: type = "NotImplemented"; break; diff --git a/cpp/src/arrow/util/status.h b/cpp/src/arrow/util/status.h index d5585313c728b..05f5b749b60cb 100644 --- a/cpp/src/arrow/util/status.h +++ b/cpp/src/arrow/util/status.h @@ -78,9 +78,10 @@ enum class StatusCode : char { OK = 0, OutOfMemory = 1, KeyError = 2, - Invalid = 3, - IOError = 4, - + TypeError = 3, + Invalid = 4, + IOError = 5, + UnknownError = 9, NotImplemented = 10, }; @@ -106,6 +107,14 @@ class ARROW_EXPORT Status { return Status(StatusCode::KeyError, msg, -1); } + static Status TypeError(const std::string& msg) { + return Status(StatusCode::TypeError, msg, -1); + } + + static Status UnknownError(const std::string& msg) { + return Status(StatusCode::UnknownError, msg, -1); + } + static Status NotImplemented(const std::string& msg) { return Status(StatusCode::NotImplemented, msg, -1); } @@ -125,6 +134,8 @@ class ARROW_EXPORT Status { bool IsKeyError() const { return code() == StatusCode::KeyError; } bool IsInvalid() const { return code() == StatusCode::Invalid; } bool IsIOError() const { return code() == StatusCode::IOError; } + + bool IsUnknownError() const { return code() == StatusCode::UnknownError; } bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; } // Return a string representation of this status suitable for printing. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 55f6d0543a108..4357fa05ff864 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -438,8 +438,6 @@ set(PYARROW_SRCS src/pyarrow/config.cc src/pyarrow/helpers.cc src/pyarrow/io.cc - src/pyarrow/status.cc - src/pyarrow/adapters/builtin.cc src/pyarrow/adapters/pandas.cc ) diff --git a/python/pyarrow/error.pxd b/python/pyarrow/error.pxd index 891d1ac1c7ea0..4fb46c25fafe4 100644 --- a/python/pyarrow/error.pxd +++ b/python/pyarrow/error.pxd @@ -16,7 +16,5 @@ # under the License. from pyarrow.includes.libarrow cimport CStatus -from pyarrow.includes.pyarrow cimport PyStatus -cdef int check_cstatus(const CStatus& status) nogil except -1 -cdef int check_status(const PyStatus& status) nogil except -1 +cdef int check_status(const CStatus& status) nogil except -1 diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx index a2c53fed8c6a0..b8a82b3754c1b 100644 --- a/python/pyarrow/error.pyx +++ b/python/pyarrow/error.pyx @@ -22,15 +22,7 @@ from pyarrow.compat import frombytes class ArrowException(Exception): pass -cdef int check_cstatus(const CStatus& status) nogil except -1: - if status.ok(): - return 0 - - cdef c_string c_message = status.ToString() - with gil: - raise ArrowException(frombytes(c_message)) - -cdef int check_status(const PyStatus& status) nogil except -1: +cdef int check_status(const CStatus& status) nogil except -1: if status.ok(): return 0 diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd index 7c47f21854e33..e1da1914c5743 100644 --- a/python/pyarrow/includes/pyarrow.pxd +++ b/python/pyarrow/includes/pyarrow.pxd @@ -25,36 +25,19 @@ cimport pyarrow.includes.libarrow_io as arrow_io cdef extern from "pyarrow/api.h" namespace "pyarrow" nogil: - # We can later add more of the common status factory methods as needed - cdef PyStatus PyStatus_OK "Status::OK"() - - cdef cppclass PyStatus "pyarrow::Status": - PyStatus() - - c_string ToString() - - c_bool ok() - c_bool IsOutOfMemory() - c_bool IsKeyError() - c_bool IsTypeError() - c_bool IsIOError() - c_bool IsValueError() - c_bool IsNotImplemented() - c_bool IsArrowError() - shared_ptr[CDataType] GetPrimitiveType(Type type) - PyStatus ConvertPySequence(object obj, shared_ptr[CArray]* out) + CStatus ConvertPySequence(object obj, shared_ptr[CArray]* out) - PyStatus PandasToArrow(MemoryPool* pool, object ao, - shared_ptr[CArray]* out) - PyStatus PandasMaskedToArrow(MemoryPool* pool, object ao, object mo, - shared_ptr[CArray]* out) + CStatus PandasToArrow(MemoryPool* pool, object ao, + shared_ptr[CArray]* out) + CStatus PandasMaskedToArrow(MemoryPool* pool, object ao, object mo, + shared_ptr[CArray]* out) - PyStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr, - object py_ref, PyObject** out) + CStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr, + object py_ref, PyObject** out) - PyStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr, - object py_ref, PyObject** out) + CStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr, + object py_ref, PyObject** out) MemoryPool* get_memory_pool() diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx index 8970e06effdd0..16ebfa1138e46 100644 --- a/python/pyarrow/io.pyx +++ b/python/pyarrow/io.pyx @@ -28,7 +28,7 @@ cimport pyarrow.includes.pyarrow as pyarrow from pyarrow.includes.libarrow_io cimport * from pyarrow.compat import frombytes, tobytes -from pyarrow.error cimport check_cstatus +from pyarrow.error cimport check_status cimport cpython as cp @@ -57,9 +57,9 @@ cdef class NativeFile: if self.is_open: with nogil: if self.is_readonly: - check_cstatus(self.rd_file.get().Close()) + check_status(self.rd_file.get().Close()) else: - check_cstatus(self.wr_file.get().Close()) + check_status(self.wr_file.get().Close()) self.is_open = False cdef read_handle(self, shared_ptr[ReadableFileInterface]* file): @@ -88,22 +88,22 @@ cdef class NativeFile: cdef int64_t size self._assert_readable() with nogil: - check_cstatus(self.rd_file.get().GetSize(&size)) + check_status(self.rd_file.get().GetSize(&size)) return size def tell(self): cdef int64_t position with nogil: if self.is_readonly: - check_cstatus(self.rd_file.get().Tell(&position)) + check_status(self.rd_file.get().Tell(&position)) else: - check_cstatus(self.wr_file.get().Tell(&position)) + check_status(self.wr_file.get().Tell(&position)) return position def seek(self, int64_t position): self._assert_readable() with nogil: - check_cstatus(self.rd_file.get().Seek(position)) + check_status(self.rd_file.get().Seek(position)) def write(self, data): """ @@ -116,7 +116,7 @@ cdef class NativeFile: cdef const uint8_t* buf = cp.PyBytes_AS_STRING(data) cdef int64_t bufsize = len(data) with nogil: - check_cstatus(self.wr_file.get().Write(buf, bufsize)) + check_status(self.wr_file.get().Write(buf, bufsize)) def read(self, int nbytes): cdef: @@ -127,8 +127,7 @@ cdef class NativeFile: self._assert_readable() with nogil: - check_cstatus(self.rd_file.get() - .ReadB(nbytes, &out)) + check_status(self.rd_file.get().ReadB(nbytes, &out)) result = cp.PyBytes_FromStringAndSize( out.get().data(), out.get().size()) @@ -223,7 +222,7 @@ cdef class InMemoryOutputStream(NativeFile): def get_result(self): cdef Buffer result = Buffer() - check_cstatus(self.wr_file.get().Close()) + check_status(self.wr_file.get().Close()) result.init( self.buffer) self.is_open = False @@ -270,7 +269,7 @@ except ImportError: def have_libhdfs(): try: - check_cstatus(ConnectLibHdfs()) + check_status(ConnectLibHdfs()) return True except: return False @@ -304,7 +303,7 @@ cdef class HdfsClient: def close(self): self._ensure_client() with nogil: - check_cstatus(self.client.get().Disconnect()) + check_status(self.client.get().Disconnect()) self.is_open = False cdef _ensure_client(self): @@ -341,8 +340,7 @@ cdef class HdfsClient: conf.user = tobytes(user) with nogil: - check_cstatus( - CHdfsClient.Connect(&conf, &out.client)) + check_status(CHdfsClient.Connect(&conf, &out.client)) out.is_open = True return out @@ -383,8 +381,8 @@ cdef class HdfsClient: self._ensure_client() with nogil: - check_cstatus(self.client.get() - .ListDirectory(c_path, &listing)) + check_status(self.client.get() + .ListDirectory(c_path, &listing)) cdef const HdfsPathInfo* info for i in range( listing.size()): @@ -422,8 +420,8 @@ cdef class HdfsClient: cdef c_string c_path = tobytes(path) with nogil: - check_cstatus(self.client.get() - .CreateDirectory(c_path)) + check_status(self.client.get() + .CreateDirectory(c_path)) def delete(self, path, bint recursive=False): """ @@ -439,8 +437,8 @@ cdef class HdfsClient: cdef c_string c_path = tobytes(path) with nogil: - check_cstatus(self.client.get() - .Delete(c_path, recursive)) + check_status(self.client.get() + .Delete(c_path, recursive)) def open(self, path, mode='rb', buffer_size=None, replication=None, default_block_size=None): @@ -473,7 +471,7 @@ cdef class HdfsClient: append = True with nogil: - check_cstatus( + check_status( self.client.get() .OpenWriteable(c_path, append, c_buffer_size, c_replication, c_default_block_size, @@ -484,8 +482,8 @@ cdef class HdfsClient: out.is_readonly = False else: with nogil: - check_cstatus(self.client.get() - .OpenReadable(c_path, &rd_handle)) + check_status(self.client.get() + .OpenReadable(c_path, &rd_handle)) out.rd_file = rd_handle out.is_readonly = True @@ -579,9 +577,9 @@ cdef class HdfsFile(NativeFile): try: with nogil: while total_bytes < nbytes: - check_cstatus(self.rd_file.get() - .Read(rpc_chunksize, &bytes_read, - buf + total_bytes)) + check_status(self.rd_file.get() + .Read(rpc_chunksize, &bytes_read, + buf + total_bytes)) total_bytes += bytes_read @@ -647,8 +645,8 @@ cdef class HdfsFile(NativeFile): try: while True: with nogil: - check_cstatus(self.rd_file.get() - .Read(self.buffer_size, &bytes_read, buf)) + check_status(self.rd_file.get() + .Read(self.buffer_size, &bytes_read, buf)) total_bytes += bytes_read diff --git a/python/pyarrow/ipc.pyx b/python/pyarrow/ipc.pyx index f8da3a70da819..46deb5ad0c35d 100644 --- a/python/pyarrow/ipc.pyx +++ b/python/pyarrow/ipc.pyx @@ -26,7 +26,7 @@ from pyarrow.includes.libarrow_io cimport * from pyarrow.includes.libarrow_ipc cimport * cimport pyarrow.includes.pyarrow as pyarrow -from pyarrow.error cimport check_cstatus +from pyarrow.error cimport check_status from pyarrow.io cimport NativeFile from pyarrow.schema cimport Schema from pyarrow.table cimport RecordBatch @@ -89,8 +89,8 @@ cdef class ArrowFileWriter: get_writer(sink, &self.sink) with nogil: - check_cstatus(CFileWriter.Open(self.sink.get(), schema.sp_schema, - &self.writer)) + check_status(CFileWriter.Open(self.sink.get(), schema.sp_schema, + &self.writer)) self.closed = False @@ -101,12 +101,12 @@ cdef class ArrowFileWriter: def write_record_batch(self, RecordBatch batch): cdef CRecordBatch* bptr = batch.batch with nogil: - check_cstatus(self.writer.get() - .WriteRecordBatch(bptr.columns(), bptr.num_rows())) + check_status(self.writer.get() + .WriteRecordBatch(bptr.columns(), bptr.num_rows())) def close(self): with nogil: - check_cstatus(self.writer.get().Close()) + check_status(self.writer.get().Close()) self.closed = True @@ -124,9 +124,9 @@ cdef class ArrowFileReader: with nogil: if offset != 0: - check_cstatus(CFileReader.Open2(reader, offset, &self.reader)) + check_status(CFileReader.Open2(reader, offset, &self.reader)) else: - check_cstatus(CFileReader.Open(reader, &self.reader)) + check_status(CFileReader.Open(reader, &self.reader)) property num_dictionaries: @@ -147,7 +147,7 @@ cdef class ArrowFileReader: raise ValueError('Batch number {0} out of range'.format(i)) with nogil: - check_cstatus(self.reader.get().GetRecordBatch(i, &batch)) + check_status(self.reader.get().GetRecordBatch(i, &batch)) result = RecordBatch() result.init(batch) diff --git a/python/pyarrow/parquet.pyx b/python/pyarrow/parquet.pyx index 2abe57b33ed48..019dd2c1de489 100644 --- a/python/pyarrow/parquet.pyx +++ b/python/pyarrow/parquet.pyx @@ -26,7 +26,7 @@ cimport pyarrow.includes.pyarrow as pyarrow from pyarrow.compat import tobytes from pyarrow.error import ArrowException -from pyarrow.error cimport check_cstatus +from pyarrow.error cimport check_status from pyarrow.io import NativeFile from pyarrow.table cimport Table @@ -62,7 +62,7 @@ cdef class ParquetReader: cdef shared_ptr[ReadableFileInterface] cpp_handle file.read_handle(&cpp_handle) - check_cstatus(OpenFile(cpp_handle, &self.allocator, &self.reader)) + check_status(OpenFile(cpp_handle, &self.allocator, &self.reader)) def read_all(self): cdef: @@ -70,8 +70,8 @@ cdef class ParquetReader: shared_ptr[CTable] ctable with nogil: - check_cstatus(self.reader.get() - .ReadFlatTable(&ctable)) + check_status(self.reader.get() + .ReadFlatTable(&ctable)) table.init(ctable) return table @@ -80,7 +80,7 @@ cdef class ParquetReader: def read_table(source, columns=None): """ Read a Table from Parquet format - + Returns ------- pyarrow.table.Table @@ -176,5 +176,5 @@ def write_table(table, filename, chunk_size=None, version=None, sink.reset(new LocalFileOutputStream(tobytes(filename))) with nogil: - check_cstatus(WriteFlatTable(ctable_, default_memory_pool(), sink, - chunk_size_, properties_builder.build())) + check_status(WriteFlatTable(ctable_, default_memory_pool(), sink, + chunk_size_, properties_builder.build())) diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index 680f3a539b5fa..c034fbd977747 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -20,13 +20,14 @@ #include "pyarrow/adapters/builtin.h" -#include +#include "arrow/api.h" +#include "arrow/util/status.h" #include "pyarrow/helpers.h" -#include "pyarrow/status.h" using arrow::ArrayBuilder; using arrow::DataType; +using arrow::Status; using arrow::Type; namespace pyarrow { @@ -129,7 +130,7 @@ class SeqVisitor { PyObject* item = item_ref.obj(); if (PyList_Check(item)) { - PY_RETURN_NOT_OK(Visit(item, level + 1)); + RETURN_NOT_OK(Visit(item, level + 1)); } else if (PyDict_Check(item)) { return Status::NotImplemented("No type inference for dicts"); } else { @@ -164,9 +165,9 @@ class SeqVisitor { Status Validate() const { if (scalars_.total_count() > 0) { if (num_nesting_levels() > 1) { - return Status::ValueError("Mixed nesting levels not supported"); + return Status::Invalid("Mixed nesting levels not supported"); } else if (max_observed_level() < max_nesting_level_) { - return Status::ValueError("Mixed nesting levels not supported"); + return Status::Invalid("Mixed nesting levels not supported"); } } return Status::OK(); @@ -216,8 +217,8 @@ static Status InferArrowType(PyObject* obj, int64_t* size, } SeqVisitor seq_visitor; - PY_RETURN_NOT_OK(seq_visitor.Visit(obj)); - PY_RETURN_NOT_OK(seq_visitor.Validate()); + RETURN_NOT_OK(seq_visitor.Visit(obj)); + RETURN_NOT_OK(seq_visitor.Validate()); *out_type = seq_visitor.GetType(); @@ -259,7 +260,7 @@ class BoolConverter : public TypedConverter { public: Status AppendData(PyObject* seq) override { Py_ssize_t size = PySequence_Size(seq); - RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); + RETURN_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -281,7 +282,7 @@ class Int64Converter : public TypedConverter { Status AppendData(PyObject* seq) override { int64_t val; Py_ssize_t size = PySequence_Size(seq); - RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); + RETURN_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -301,7 +302,7 @@ class DoubleConverter : public TypedConverter { Status AppendData(PyObject* seq) override { double val; Py_ssize_t size = PySequence_Size(seq); - RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); + RETURN_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { @@ -330,7 +331,7 @@ class StringConverter : public TypedConverter { OwnedRef holder(item); if (item == Py_None) { - RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + RETURN_NOT_OK(typed_builder_->AppendNull()); continue; } else if (PyUnicode_Check(item)) { tmp.reset(PyUnicode_AsUTF8String(item)); @@ -344,7 +345,7 @@ class StringConverter : public TypedConverter { // No error checking length = PyBytes_GET_SIZE(bytes_obj); bytes = PyBytes_AS_STRING(bytes_obj); - RETURN_ARROW_NOT_OK(typed_builder_->Append(bytes, length)); + RETURN_NOT_OK(typed_builder_->Append(bytes, length)); } return Status::OK(); } @@ -359,10 +360,10 @@ class ListConverter : public TypedConverter { for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { - RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + RETURN_NOT_OK(typed_builder_->AppendNull()); } else { typed_builder_->Append(); - PY_RETURN_NOT_OK(value_converter_->AppendData(item.obj())); + RETURN_NOT_OK(value_converter_->AppendData(item.obj())); } } return Status::OK(); @@ -408,7 +409,7 @@ Status ListConverter::Init(const std::shared_ptr& builder) { Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { std::shared_ptr type; int64_t size; - PY_RETURN_NOT_OK(InferArrowType(obj, &size, &type)); + RETURN_NOT_OK(InferArrowType(obj, &size, &type)); // Handle NA / NullType case if (type->type == Type::NA) { @@ -426,14 +427,12 @@ Status ConvertPySequence(PyObject* obj, std::shared_ptr* out) { // Give the sequence converter an array builder std::shared_ptr builder; - RETURN_ARROW_NOT_OK(arrow::MakeBuilder(get_memory_pool(), type, &builder)); + RETURN_NOT_OK(arrow::MakeBuilder(get_memory_pool(), type, &builder)); converter->Init(builder); - PY_RETURN_NOT_OK(converter->AppendData(obj)); + RETURN_NOT_OK(converter->AppendData(obj)); - *out = builder->Finish(); - - return Status::OK(); + return builder->Finish(out); } } // namespace pyarrow diff --git a/python/src/pyarrow/adapters/builtin.h b/python/src/pyarrow/adapters/builtin.h index 4e997e31dd690..2ddfdaaf44134 100644 --- a/python/src/pyarrow/adapters/builtin.h +++ b/python/src/pyarrow/adapters/builtin.h @@ -30,14 +30,15 @@ #include "pyarrow/common.h" #include "pyarrow/visibility.h" -namespace arrow { class Array; } +namespace arrow { +class Array; +class Status; +} namespace pyarrow { -class Status; - PYARROW_EXPORT -Status ConvertPySequence(PyObject* obj, std::shared_ptr* out); +arrow::Status ConvertPySequence(PyObject* obj, std::shared_ptr* out); } // namespace pyarrow diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index b2fcd37aec944..5902b8341696d 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -31,10 +31,10 @@ #include "arrow/api.h" #include "arrow/util/bit-util.h" +#include "arrow/util/status.h" #include "pyarrow/common.h" #include "pyarrow/config.h" -#include "pyarrow/status.h" namespace pyarrow { @@ -42,6 +42,8 @@ using arrow::Array; using arrow::Column; using arrow::Field; using arrow::DataType; +using arrow::Status; + namespace util = arrow::util; // ---------------------------------------------------------------------- @@ -149,7 +151,7 @@ class ArrowSerializer { int null_bytes = util::bytes_for_bits(length_); null_bitmap_ = std::make_shared(pool_); - RETURN_ARROW_NOT_OK(null_bitmap_->Resize(null_bytes)); + RETURN_NOT_OK(null_bitmap_->Resize(null_bytes)); null_bitmap_data_ = null_bitmap_->mutable_data(); memset(null_bitmap_data_, 0, null_bytes); @@ -171,9 +173,9 @@ class ArrowSerializer { PyObject** objects = reinterpret_cast(PyArray_DATA(arr_)); arrow::TypePtr string_type(new arrow::StringType()); arrow::StringBuilder string_builder(pool_, string_type); - RETURN_ARROW_NOT_OK(string_builder.Resize(length_)); + RETURN_NOT_OK(string_builder.Resize(length_)); - arrow::Status s; + Status s; PyObject* obj; for (int64_t i = 0; i < length_; ++i) { obj = objects[i]; @@ -187,18 +189,16 @@ class ArrowSerializer { s = string_builder.Append(PyBytes_AS_STRING(obj), length); Py_DECREF(obj); if (!s.ok()) { - return Status::ArrowError(s.ToString()); + return s; } } else if (PyBytes_Check(obj)) { const int32_t length = PyBytes_GET_SIZE(obj); - RETURN_ARROW_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length)); + RETURN_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length)); } else { string_builder.AppendNull(); } } - *out = std::shared_ptr(string_builder.Finish()); - - return Status::OK(); + return string_builder.Finish(out); } Status ConvertBooleans(std::shared_ptr* out) { @@ -208,7 +208,7 @@ class ArrowSerializer { int nbytes = util::bytes_for_bits(length_); auto data = std::make_shared(pool_); - RETURN_ARROW_NOT_OK(data->Resize(nbytes)); + RETURN_NOT_OK(data->Resize(nbytes)); uint8_t* bitmap = data->mutable_data(); memset(bitmap, 0, nbytes); @@ -305,7 +305,7 @@ inline Status ArrowSerializer::MakeDataType(std::shared_ptrreset(new arrow::TimestampType(unit)); @@ -330,7 +330,7 @@ inline Status ArrowSerializer::Convert(std::shared_ptr* out) { RETURN_NOT_OK(ConvertData()); std::shared_ptr type; RETURN_NOT_OK(MakeDataType(&type)); - RETURN_ARROW_NOT_OK(MakePrimitiveArray(type, length_, data_, null_count, null_bitmap_, out)); + RETURN_NOT_OK(MakePrimitiveArray(type, length_, data_, null_count, null_bitmap_, out)); return Status::OK(); } @@ -389,7 +389,7 @@ template inline Status ArrowSerializer::ConvertData() { // TODO(wesm): strided arrays if (is_strided()) { - return Status::ValueError("no support for strided data yet"); + return Status::Invalid("no support for strided data yet"); } data_ = std::make_shared(arr_); @@ -399,12 +399,12 @@ inline Status ArrowSerializer::ConvertData() { template <> inline Status ArrowSerializer::ConvertData() { if (is_strided()) { - return Status::ValueError("no support for strided data yet"); + return Status::Invalid("no support for strided data yet"); } int nbytes = util::bytes_for_bits(length_); auto buffer = std::make_shared(pool_); - RETURN_ARROW_NOT_OK(buffer->Resize(nbytes)); + RETURN_NOT_OK(buffer->Resize(nbytes)); const uint8_t* values = reinterpret_cast(PyArray_DATA(arr_)); @@ -446,7 +446,7 @@ Status PandasMaskedToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, } if (PyArray_NDIM(arr) != 1) { - return Status::ValueError("only handle 1-dimensional arrays"); + return Status::Invalid("only handle 1-dimensional arrays"); } switch(PyArray_DESCR(arr)->type_num) { diff --git a/python/src/pyarrow/adapters/pandas.h b/python/src/pyarrow/adapters/pandas.h index 141d1219e64db..532495dd792db 100644 --- a/python/src/pyarrow/adapters/pandas.h +++ b/python/src/pyarrow/adapters/pandas.h @@ -32,27 +32,26 @@ namespace arrow { class Array; class Column; class MemoryPool; +class Status; } // namespace arrow namespace pyarrow { -class Status; - PYARROW_EXPORT -Status ConvertArrayToPandas(const std::shared_ptr& arr, PyObject* py_ref, - PyObject** out); +arrow::Status ConvertArrayToPandas(const std::shared_ptr& arr, + PyObject* py_ref, PyObject** out); PYARROW_EXPORT -Status ConvertColumnToPandas(const std::shared_ptr& col, PyObject* py_ref, - PyObject** out); +arrow::Status ConvertColumnToPandas(const std::shared_ptr& col, + PyObject* py_ref, PyObject** out); PYARROW_EXPORT -Status PandasMaskedToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, +arrow::Status PandasMaskedToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, std::shared_ptr* out); PYARROW_EXPORT -Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, +arrow::Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, std::shared_ptr* out); } // namespace pyarrow diff --git a/python/src/pyarrow/api.h b/python/src/pyarrow/api.h index 72be6afe02c76..6dbbc45d40ccc 100644 --- a/python/src/pyarrow/api.h +++ b/python/src/pyarrow/api.h @@ -18,8 +18,6 @@ #ifndef PYARROW_API_H #define PYARROW_API_H -#include "pyarrow/status.h" - #include "pyarrow/helpers.h" #include "pyarrow/adapters/builtin.h" diff --git a/python/src/pyarrow/common.cc b/python/src/pyarrow/common.cc index 09f3efb5a03bc..fa875f2b9aba1 100644 --- a/python/src/pyarrow/common.cc +++ b/python/src/pyarrow/common.cc @@ -21,10 +21,10 @@ #include #include -#include -#include +#include "arrow/util/memory-pool.h" +#include "arrow/util/status.h" -#include "pyarrow/status.h" +using arrow::Status; namespace pyarrow { @@ -33,18 +33,18 @@ class PyArrowMemoryPool : public arrow::MemoryPool { PyArrowMemoryPool() : bytes_allocated_(0) {} virtual ~PyArrowMemoryPool() {} - arrow::Status Allocate(int64_t size, uint8_t** out) override { + Status Allocate(int64_t size, uint8_t** out) override { std::lock_guard guard(pool_lock_); *out = static_cast(std::malloc(size)); if (*out == nullptr) { std::stringstream ss; ss << "malloc of size " << size << " failed"; - return arrow::Status::OutOfMemory(ss.str()); + return Status::OutOfMemory(ss.str()); } bytes_allocated_ += size; - return arrow::Status::OK(); + return Status::OK(); } int64_t bytes_allocated() const override { diff --git a/python/src/pyarrow/common.h b/python/src/pyarrow/common.h index 50c2577b93c9b..7f3131ef03dd8 100644 --- a/python/src/pyarrow/common.h +++ b/python/src/pyarrow/common.h @@ -29,13 +29,6 @@ namespace pyarrow { #define PYARROW_IS_PY2 PY_MAJOR_VERSION <= 2 -#define RETURN_ARROW_NOT_OK(s) do { \ - arrow::Status _s = (s); \ - if (!_s.ok()) { \ - return Status::ArrowError(s.ToString()); \ - } \ - } while (0); - class OwnedRef { public: OwnedRef() : obj_(nullptr) {} diff --git a/python/src/pyarrow/io.cc b/python/src/pyarrow/io.cc index 7bf32ffa8d22b..e6dbc12d429b0 100644 --- a/python/src/pyarrow/io.cc +++ b/python/src/pyarrow/io.cc @@ -20,12 +20,13 @@ #include #include -#include -#include -#include +#include "arrow/io/memory.h" +#include "arrow/util/memory-pool.h" +#include "arrow/util/status.h" #include "pyarrow/common.h" -#include "pyarrow/status.h" + +using arrow::Status; namespace pyarrow { @@ -41,7 +42,7 @@ PythonFile::~PythonFile() { Py_DECREF(file_); } -static arrow::Status CheckPyError() { +static Status CheckPyError() { if (PyErr_Occurred()) { PyObject *exc_type, *exc_value, *traceback; PyErr_Fetch(&exc_type, &exc_value, &traceback); @@ -51,35 +52,35 @@ static arrow::Status CheckPyError() { Py_XDECREF(exc_value); Py_XDECREF(traceback); PyErr_Clear(); - return arrow::Status::IOError(message); + return Status::IOError(message); } - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Close() { +Status PythonFile::Close() { // whence: 0 for relative to start of file, 2 for end of file PyObject* result = PyObject_CallMethod(file_, "close", "()"); Py_XDECREF(result); ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Seek(int64_t position, int whence) { +Status PythonFile::Seek(int64_t position, int whence) { // whence: 0 for relative to start of file, 2 for end of file PyObject* result = PyObject_CallMethod(file_, "seek", "(ii)", position, whence); Py_XDECREF(result); ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Read(int64_t nbytes, PyObject** out) { +Status PythonFile::Read(int64_t nbytes, PyObject** out) { PyObject* result = PyObject_CallMethod(file_, "read", "(i)", nbytes); ARROW_RETURN_NOT_OK(CheckPyError()); *out = result; - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Write(const uint8_t* data, int64_t nbytes) { +Status PythonFile::Write(const uint8_t* data, int64_t nbytes) { PyObject* py_data = PyBytes_FromStringAndSize( reinterpret_cast(data), nbytes); ARROW_RETURN_NOT_OK(CheckPyError()); @@ -88,10 +89,10 @@ arrow::Status PythonFile::Write(const uint8_t* data, int64_t nbytes) { Py_XDECREF(py_data); Py_XDECREF(result); ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PythonFile::Tell(int64_t* position) { +Status PythonFile::Tell(int64_t* position) { PyObject* result = PyObject_CallMethod(file_, "tell", "()"); ARROW_RETURN_NOT_OK(CheckPyError()); @@ -101,7 +102,7 @@ arrow::Status PythonFile::Tell(int64_t* position) { // PyLong_AsLongLong can raise OverflowError ARROW_RETURN_NOT_OK(CheckPyError()); - return arrow::Status::OK(); + return Status::OK(); } // ---------------------------------------------------------------------- @@ -113,22 +114,22 @@ PyReadableFile::PyReadableFile(PyObject* file) { PyReadableFile::~PyReadableFile() {} -arrow::Status PyReadableFile::Close() { +Status PyReadableFile::Close() { PyGILGuard lock; return file_->Close(); } -arrow::Status PyReadableFile::Seek(int64_t position) { +Status PyReadableFile::Seek(int64_t position) { PyGILGuard lock; return file_->Seek(position, 0); } -arrow::Status PyReadableFile::Tell(int64_t* position) { +Status PyReadableFile::Tell(int64_t* position) { PyGILGuard lock; return file_->Tell(position); } -arrow::Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) { +Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* out) { PyGILGuard lock; PyObject* bytes_obj; ARROW_RETURN_NOT_OK(file_->Read(nbytes, &bytes_obj)); @@ -137,10 +138,10 @@ arrow::Status PyReadableFile::Read(int64_t nbytes, int64_t* bytes_read, uint8_t* std::memcpy(out, PyBytes_AS_STRING(bytes_obj), *bytes_read); Py_DECREF(bytes_obj); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr* out) { +Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr* out) { PyGILGuard lock; PyObject* bytes_obj; @@ -149,10 +150,10 @@ arrow::Status PyReadableFile::Read(int64_t nbytes, std::shared_ptr(bytes_obj); Py_DECREF(bytes_obj); - return arrow::Status::OK(); + return Status::OK(); } -arrow::Status PyReadableFile::GetSize(int64_t* size) { +Status PyReadableFile::GetSize(int64_t* size) { PyGILGuard lock; int64_t current_position;; @@ -167,7 +168,7 @@ arrow::Status PyReadableFile::GetSize(int64_t* size) { ARROW_RETURN_NOT_OK(file_->Seek(current_position, 0)); *size = file_size; - return arrow::Status::OK(); + return Status::OK(); } bool PyReadableFile::supports_zero_copy() const { @@ -183,17 +184,17 @@ PyOutputStream::PyOutputStream(PyObject* file) { PyOutputStream::~PyOutputStream() {} -arrow::Status PyOutputStream::Close() { +Status PyOutputStream::Close() { PyGILGuard lock; return file_->Close(); } -arrow::Status PyOutputStream::Tell(int64_t* position) { +Status PyOutputStream::Tell(int64_t* position) { PyGILGuard lock; return file_->Tell(position); } -arrow::Status PyOutputStream::Write(const uint8_t* data, int64_t nbytes) { +Status PyOutputStream::Write(const uint8_t* data, int64_t nbytes) { PyGILGuard lock; return file_->Write(data, nbytes); } diff --git a/python/src/pyarrow/status.cc b/python/src/pyarrow/status.cc deleted file mode 100644 index 1cd54f6a78560..0000000000000 --- a/python/src/pyarrow/status.cc +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A Status encapsulates the result of an operation. It may indicate success, -// or it may indicate an error with an associated error message. -// -// Multiple threads can invoke const methods on a Status without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same Status must use -// external synchronization. - -#include "pyarrow/status.h" - -#include -#include -#include - -namespace pyarrow { - -Status::Status(StatusCode code, const std::string& msg, int16_t posix_code) { - assert(code != StatusCode::OK); - const uint32_t size = msg.size(); - char* result = new char[size + 7]; - memcpy(result, &size, sizeof(size)); - result[4] = static_cast(code); - memcpy(result + 5, &posix_code, sizeof(posix_code)); - memcpy(result + 7, msg.c_str(), msg.size()); - state_ = result; -} - -const char* Status::CopyState(const char* state) { - uint32_t size; - memcpy(&size, state, sizeof(size)); - char* result = new char[size + 7]; - memcpy(result, state, size + 7); - return result; -} - -std::string Status::CodeAsString() const { - if (state_ == NULL) { - return "OK"; - } - - const char* type; - switch (code()) { - case StatusCode::OK: - type = "OK"; - break; - case StatusCode::OutOfMemory: - type = "Out of memory"; - break; - case StatusCode::KeyError: - type = "Key error"; - break; - case StatusCode::TypeError: - type = "Value error"; - break; - case StatusCode::ValueError: - type = "Value error"; - break; - case StatusCode::IOError: - type = "IO error"; - break; - case StatusCode::NotImplemented: - type = "Not implemented"; - break; - case StatusCode::ArrowError: - type = "Arrow C++ error"; - break; - case StatusCode::UnknownError: - type = "Unknown error"; - break; - } - return std::string(type); -} - -std::string Status::ToString() const { - std::string result(CodeAsString()); - if (state_ == NULL) { - return result; - } - - result.append(": "); - - uint32_t length; - memcpy(&length, state_, sizeof(length)); - result.append(reinterpret_cast(state_ + 7), length); - return result; -} - -} // namespace pyarrow diff --git a/python/src/pyarrow/status.h b/python/src/pyarrow/status.h deleted file mode 100644 index 67cd66c58eeb3..0000000000000 --- a/python/src/pyarrow/status.h +++ /dev/null @@ -1,146 +0,0 @@ -// Copyright (c) 2011 The LevelDB Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. See the AUTHORS file for names of contributors. -// -// A Status encapsulates the result of an operation. It may indicate success, -// or it may indicate an error with an associated error message. -// -// Multiple threads can invoke const methods on a Status without -// external synchronization, but if any of the threads may call a -// non-const method, all threads accessing the same Status must use -// external synchronization. - -#ifndef PYARROW_STATUS_H_ -#define PYARROW_STATUS_H_ - -#include -#include -#include - -#include "pyarrow/visibility.h" - -namespace pyarrow { - -#define PY_RETURN_NOT_OK(s) do { \ - Status _s = (s); \ - if (!_s.ok()) return _s; \ - } while (0); - -enum class StatusCode: char { - OK = 0, - OutOfMemory = 1, - KeyError = 2, - TypeError = 3, - ValueError = 4, - IOError = 5, - NotImplemented = 6, - - ArrowError = 7, - - UnknownError = 10 -}; - -class PYARROW_EXPORT Status { - public: - // Create a success status. - Status() : state_(NULL) { } - ~Status() { delete[] state_; } - - // Copy the specified status. - Status(const Status& s); - void operator=(const Status& s); - - // Return a success status. - static Status OK() { return Status(); } - - // Return error status of an appropriate type. - static Status OutOfMemory(const std::string& msg, int16_t posix_code = -1) { - return Status(StatusCode::OutOfMemory, msg, posix_code); - } - - static Status KeyError(const std::string& msg) { - return Status(StatusCode::KeyError, msg, -1); - } - - static Status TypeError(const std::string& msg) { - return Status(StatusCode::TypeError, msg, -1); - } - - static Status IOError(const std::string& msg) { - return Status(StatusCode::IOError, msg, -1); - } - - static Status ValueError(const std::string& msg) { - return Status(StatusCode::ValueError, msg, -1); - } - - static Status NotImplemented(const std::string& msg) { - return Status(StatusCode::NotImplemented, msg, -1); - } - - static Status UnknownError(const std::string& msg) { - return Status(StatusCode::UnknownError, msg, -1); - } - - static Status ArrowError(const std::string& msg) { - return Status(StatusCode::ArrowError, msg, -1); - } - - // Returns true iff the status indicates success. - bool ok() const { return (state_ == NULL); } - - bool IsOutOfMemory() const { return code() == StatusCode::OutOfMemory; } - bool IsKeyError() const { return code() == StatusCode::KeyError; } - bool IsIOError() const { return code() == StatusCode::IOError; } - bool IsTypeError() const { return code() == StatusCode::TypeError; } - bool IsValueError() const { return code() == StatusCode::ValueError; } - - bool IsUnknownError() const { return code() == StatusCode::UnknownError; } - - bool IsArrowError() const { return code() == StatusCode::ArrowError; } - - // Return a string representation of this status suitable for printing. - // Returns the string "OK" for success. - std::string ToString() const; - - // Return a string representation of the status code, without the message - // text or posix code information. - std::string CodeAsString() const; - - // Get the POSIX code associated with this Status, or -1 if there is none. - int16_t posix_code() const; - - private: - // OK status has a NULL state_. Otherwise, state_ is a new[] array - // of the following form: - // state_[0..3] == length of message - // state_[4] == code - // state_[5..6] == posix_code - // state_[7..] == message - const char* state_; - - StatusCode code() const { - return ((state_ == NULL) ? - StatusCode::OK : static_cast(state_[4])); - } - - Status(StatusCode code, const std::string& msg, int16_t posix_code); - static const char* CopyState(const char* s); -}; - -inline Status::Status(const Status& s) { - state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); -} - -inline void Status::operator=(const Status& s) { - // The following condition catches both aliasing (when this == &s), - // and the common case where both s and *this are ok. - if (state_ != s.state_) { - delete[] state_; - state_ = (s.state_ == NULL) ? NULL : CopyState(s.state_); - } -} - -} // namespace pyarrow - -#endif // PYARROW_STATUS_H_