From 41bf5f8fa44006fdbe29ae5736dcfb8e61573fc9 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 12 Mar 2018 15:33:55 +0100 Subject: [PATCH] ARROW-2292: [Python] Rename frombuffer() to py_buffer() --- python/doc/source/api.rst | 2 +- python/doc/source/ipc.rst | 5 +-- python/doc/source/memory.rst | 2 +- python/pyarrow/__init__.py | 11 ++++--- python/pyarrow/io.pxi | 11 ++++--- python/pyarrow/serialization.py | 4 +-- python/pyarrow/tests/test_io.py | 38 +++++++++++----------- python/pyarrow/tests/test_serialization.py | 6 ++-- python/pyarrow/util.py | 8 ++--- 9 files changed, 46 insertions(+), 41 deletions(-) diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst index 3db1a04b6d20..cb9993302a7a 100644 --- a/python/doc/source/api.rst +++ b/python/doc/source/api.rst @@ -213,7 +213,7 @@ Input / Output and Shared Memory allocate_buffer compress decompress - frombuffer + py_buffer foreign_buffer Buffer ResizableBuffer diff --git a/python/doc/source/ipc.rst b/python/doc/source/ipc.rst index bce8b1ed1e95..c77888ab906b 100644 --- a/python/doc/source/ipc.rst +++ b/python/doc/source/ipc.rst @@ -296,12 +296,13 @@ which are zero-copy convertible to Python ``memoryview`` objects: memoryview(components['data'][0]) -A memoryview can be converted back to a ``Buffer`` with ``pyarrow.frombuffer``: +A memoryview can be converted back to a Arrow ``Buffer`` with +``pyarrow.py_buffer``: .. ipython:: python mv = memoryview(components['data'][0]) - buf = pa.frombuffer(mv) + buf = pa.py_buffer(mv) An object can be reconstructed from its component-based representation using ``deserialize_components``: diff --git a/python/doc/source/memory.rst b/python/doc/source/memory.rst index 4806bbb85722..34664b898f9e 100644 --- a/python/doc/source/memory.rst +++ b/python/doc/source/memory.rst @@ -50,7 +50,7 @@ implements the buffer protocol. Let's consider a bytes object: import pyarrow as pa data = b'abcdefghijklmnopqrstuvwxyz' - buf = pa.frombuffer(data) + buf = pa.py_buffer(data) buf buf.size diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 965a37b4b9cf..bfd7d4db9865 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -28,7 +28,8 @@ def parse_version(root): from setuptools_scm import version_from_scm import setuptools_scm.git - describe = setuptools_scm.git.DEFAULT_DESCRIBE + " --match 'apache-arrow-[0-9]*'" + describe = (setuptools_scm.git.DEFAULT_DESCRIBE + + " --match 'apache-arrow-[0-9]*'") # Strip catchall from the commandline describe = describe.replace("--match *.*", "") version = setuptools_scm.git.parse(root, describe) @@ -86,8 +87,8 @@ def parse_version(root): from pyarrow.lib import TimestampType # Buffers, allocation -from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, compress, - decompress, allocate_buffer, frombuffer) +from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer, + compress, decompress, allocate_buffer) from pyarrow.lib import (MemoryPool, total_allocated_bytes, set_memory_pool, default_memory_pool, @@ -163,7 +164,9 @@ def _plasma_store_entry_point(): # ---------------------------------------------------------------------- # Deprecations -from pyarrow.util import _deprecate_class # noqa +from pyarrow.util import _deprecate_api # noqa + +frombuffer = _deprecate_api('frombuffer', 'py_buffer', py_buffer, '0.9.0') # ---------------------------------------------------------------------- # Returning absolute path to the pyarrow include directory (if bundled, e.g. in diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 15ecd0164e43..3947323233f8 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -26,6 +26,7 @@ import six import sys import threading import time +import warnings # 64K @@ -211,7 +212,7 @@ cdef class NativeFile: if isinstance(data, six.string_types): data = tobytes(data) - cdef Buffer arrow_buffer = frombuffer(data) + cdef Buffer arrow_buffer = py_buffer(data) cdef const uint8_t* buf = arrow_buffer.buffer.get().data() cdef int64_t bufsize = len(arrow_buffer) @@ -833,14 +834,14 @@ cdef class BufferReader(NativeFile): if isinstance(obj, Buffer): self.buffer = obj else: - self.buffer = frombuffer(obj) + self.buffer = py_buffer(obj) self.rd_file.reset(new CBufferReader(self.buffer.buffer)) self.is_readable = True self.closed = False -def frombuffer(object obj): +def py_buffer(object obj): """ Construct an Arrow buffer from a Python bytes object """ @@ -966,7 +967,7 @@ def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): check_status(CCodec.Create(c_codec, &compressor)) if not isinstance(buf, Buffer): - buf = frombuffer(buf) + buf = py_buffer(buf) c_buf = ( buf).buffer.get() @@ -1031,7 +1032,7 @@ def decompress(object buf, decompressed_size=None, codec='lz4', check_status(CCodec.Create(c_codec, &compressor)) if not isinstance(buf, Buffer): - buf = frombuffer(buf) + buf = py_buffer(buf) c_buf = ( buf).buffer.get() diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py index bdf753579683..6c8df350bf46 100644 --- a/python/pyarrow/serialization.py +++ b/python/pyarrow/serialization.py @@ -23,7 +23,7 @@ from pyarrow.compat import builtin_pickle from pyarrow.lib import (SerializationContext, _default_serialization_context, - frombuffer) + py_buffer) try: import cloudpickle @@ -46,7 +46,7 @@ def _deserialize_numpy_array_list(data): def _pickle_to_buffer(x): pickled = builtin_pickle.dumps(x, protocol=builtin_pickle.HIGHEST_PROTOCOL) - return frombuffer(pickled) + return py_buffer(pickled) def _load_pickle_from_buffer(data): diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index fe680133b4b3..591381085c2a 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -170,7 +170,7 @@ def test_python_file_closing(): def test_buffer_bytes(): val = b'some data' - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert isinstance(buf, pa.Buffer) assert not buf.is_mutable @@ -182,7 +182,7 @@ def test_buffer_bytes(): def test_buffer_memoryview(): val = b'some data' - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert isinstance(buf, pa.Buffer) assert not buf.is_mutable @@ -194,7 +194,7 @@ def test_buffer_memoryview(): def test_buffer_bytearray(): val = bytearray(b'some data') - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert isinstance(buf, pa.Buffer) assert buf.is_mutable @@ -206,14 +206,14 @@ def test_buffer_bytearray(): def test_buffer_invalid(): with pytest.raises(TypeError, match="(bytes-like object|buffer interface)"): - pa.frombuffer(None) + pa.py_buffer(None) def test_buffer_to_numpy(): # Make sure creating a numpy array from an arrow buffer works byte_array = bytearray(20) byte_array[0] = 42 - buf = pa.frombuffer(byte_array) + buf = pa.py_buffer(byte_array) array = np.frombuffer(buf, dtype="uint8") assert array[0] == byte_array[0] byte_array[0] += 1 @@ -224,14 +224,14 @@ def test_buffer_to_numpy(): def test_buffer_from_numpy(): # C-contiguous arr = np.arange(12, dtype=np.int8).reshape((3, 4)) - buf = pa.frombuffer(arr) + buf = pa.py_buffer(arr) assert buf.to_pybytes() == arr.tobytes() # F-contiguous; note strides informations is lost - buf = pa.frombuffer(arr.T) + buf = pa.py_buffer(arr.T) assert buf.to_pybytes() == arr.tobytes() # Non-contiguous with pytest.raises(ValueError, match="not contiguous"): - buf = pa.frombuffer(arr.T[::2]) + buf = pa.py_buffer(arr.T[::2]) def test_buffer_equals(): @@ -250,11 +250,11 @@ def ne(a, b): b2 = bytearray(b1) b3 = bytearray(b1) b3[0] = 42 - buf1 = pa.frombuffer(b1) - buf2 = pa.frombuffer(b2) - buf3 = pa.frombuffer(b2) - buf4 = pa.frombuffer(b3) - buf5 = pa.frombuffer(np.frombuffer(b2, dtype=np.int16)) + buf1 = pa.py_buffer(b1) + buf2 = pa.py_buffer(b2) + buf3 = pa.py_buffer(b2) + buf4 = pa.py_buffer(b3) + buf5 = pa.py_buffer(np.frombuffer(b2, dtype=np.int16)) eq(buf1, buf1) eq(buf1, buf2) eq(buf2, buf3) @@ -266,7 +266,7 @@ def ne(a, b): def test_buffer_hashing(): # Buffers are unhashable with pytest.raises(TypeError, match="unhashable"): - hash(pa.frombuffer(b'123')) + hash(pa.py_buffer(b'123')) def test_foreign_buffer(): @@ -307,7 +307,7 @@ def test_compress_decompress(): test_data = (np.random.randint(0, 255, size=INPUT_SIZE) .astype(np.uint8) .tostring()) - test_buf = pa.frombuffer(test_data) + test_buf = pa.py_buffer(test_data) codecs = ['lz4', 'snappy', 'gzip', 'zstd', 'brotli'] for codec in codecs: @@ -333,7 +333,7 @@ def test_compress_decompress(): def test_buffer_memoryview_is_immutable(): val = b'some data' - buf = pa.frombuffer(val) + buf = pa.py_buffer(val) assert not buf.is_mutable assert isinstance(buf, pa.Buffer) @@ -368,9 +368,9 @@ def test_uninitialized_buffer(): with check_uninitialized(): memoryview(buf) with check_uninitialized(): - buf.equals(pa.frombuffer(b'')) + buf.equals(pa.py_buffer(b'')) with check_uninitialized(): - pa.frombuffer(b'').equals(buf) + pa.py_buffer(b'').equals(buf) def test_memory_output_stream(): @@ -400,7 +400,7 @@ def test_inmemory_write_after_closed(): def test_buffer_protocol_ref_counting(): def make_buffer(bytes_obj): - return bytearray(pa.frombuffer(bytes_obj)) + return bytearray(pa.py_buffer(bytes_obj)) buf = make_buffer(b'foo') gc.collect() diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py index 64aab0671244..7ddf3958e5d8 100644 --- a/python/pyarrow/tests/test_serialization.py +++ b/python/pyarrow/tests/test_serialization.py @@ -445,7 +445,7 @@ class BufferClass(object): pass def serialize_buffer_class(obj): - return pa.frombuffer(b"hello") + return pa.py_buffer(b"hello") def deserialize_buffer_class(serialized_obj): return serialized_obj @@ -581,7 +581,7 @@ def test_serialize_subclasses(): def test_serialize_to_components_invalid_cases(): - buf = pa.frombuffer(b'hello') + buf = pa.py_buffer(b'hello') components = { 'num_tensors': 0, @@ -631,7 +631,7 @@ def test_deserialize_buffer_in_different_process(): import subprocess f = tempfile.NamedTemporaryFile(delete=False) - b = pa.serialize(pa.frombuffer(b'hello')).to_buffer() + b = pa.serialize(pa.py_buffer(b'hello')).to_buffer() f.write(b.to_pybytes()) f.close() diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index d984e19215b4..b8825658d88e 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -27,11 +27,11 @@ def decorator(g): return decorator -def _deprecate_class(old_name, new_name, klass, next_version='0.5.0'): +def _deprecate_api(old_name, new_name, api, next_version): msg = ('pyarrow.{0} is deprecated as of {1}, please use {2} instead' .format(old_name, next_version, new_name)) - def deprecated_factory(*args, **kwargs): + def wrapper(*args, **kwargs): warnings.warn(msg, FutureWarning) - return klass(*args) - return deprecated_factory + return api(*args) + return wrapper