Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ Input / Output and Shared Memory
allocate_buffer
compress
decompress
frombuffer
py_buffer
foreign_buffer
Buffer
ResizableBuffer
Expand Down
5 changes: 3 additions & 2 deletions python/doc/source/ipc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -296,12 +296,13 @@ which are zero-copy convertible to Python ``memoryview`` objects:

memoryview(components['data'][0])

A memoryview can be converted back to a ``Buffer`` with ``pyarrow.frombuffer``:
A memoryview can be converted back to a Arrow ``Buffer`` with
``pyarrow.py_buffer``:

.. ipython:: python

mv = memoryview(components['data'][0])
buf = pa.frombuffer(mv)
buf = pa.py_buffer(mv)

An object can be reconstructed from its component-based representation using
``deserialize_components``:
Expand Down
2 changes: 1 addition & 1 deletion python/doc/source/memory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ implements the buffer protocol. Let's consider a bytes object:
import pyarrow as pa

data = b'abcdefghijklmnopqrstuvwxyz'
buf = pa.frombuffer(data)
buf = pa.py_buffer(data)
buf
buf.size

Expand Down
11 changes: 7 additions & 4 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
def parse_version(root):
from setuptools_scm import version_from_scm
import setuptools_scm.git
describe = setuptools_scm.git.DEFAULT_DESCRIBE + " --match 'apache-arrow-[0-9]*'"
describe = (setuptools_scm.git.DEFAULT_DESCRIBE +
" --match 'apache-arrow-[0-9]*'")
# Strip catchall from the commandline
describe = describe.replace("--match *.*", "")
version = setuptools_scm.git.parse(root, describe)
Expand Down Expand Up @@ -86,8 +87,8 @@ def parse_version(root):
from pyarrow.lib import TimestampType

# Buffers, allocation
from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, compress,
decompress, allocate_buffer, frombuffer)
from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
compress, decompress, allocate_buffer)

from pyarrow.lib import (MemoryPool, total_allocated_bytes,
set_memory_pool, default_memory_pool,
Expand Down Expand Up @@ -163,7 +164,9 @@ def _plasma_store_entry_point():
# ----------------------------------------------------------------------
# Deprecations

from pyarrow.util import _deprecate_class # noqa
from pyarrow.util import _deprecate_api # noqa

frombuffer = _deprecate_api('frombuffer', 'py_buffer', py_buffer, '0.9.0')

# ----------------------------------------------------------------------
# Returning absolute path to the pyarrow include directory (if bundled, e.g. in
Expand Down
11 changes: 6 additions & 5 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import six
import sys
import threading
import time
import warnings


# 64K
Expand Down Expand Up @@ -211,7 +212,7 @@ cdef class NativeFile:
if isinstance(data, six.string_types):
data = tobytes(data)

cdef Buffer arrow_buffer = frombuffer(data)
cdef Buffer arrow_buffer = py_buffer(data)

cdef const uint8_t* buf = arrow_buffer.buffer.get().data()
cdef int64_t bufsize = len(arrow_buffer)
Expand Down Expand Up @@ -833,14 +834,14 @@ cdef class BufferReader(NativeFile):
if isinstance(obj, Buffer):
self.buffer = obj
else:
self.buffer = frombuffer(obj)
self.buffer = py_buffer(obj)

self.rd_file.reset(new CBufferReader(self.buffer.buffer))
self.is_readable = True
self.closed = False


def frombuffer(object obj):
def py_buffer(object obj):
"""
Construct an Arrow buffer from a Python bytes object
"""
Expand Down Expand Up @@ -966,7 +967,7 @@ def compress(object buf, codec='lz4', asbytes=False, memory_pool=None):
check_status(CCodec.Create(c_codec, &compressor))

if not isinstance(buf, Buffer):
buf = frombuffer(buf)
buf = py_buffer(buf)

c_buf = (<Buffer> buf).buffer.get()

Expand Down Expand Up @@ -1031,7 +1032,7 @@ def decompress(object buf, decompressed_size=None, codec='lz4',
check_status(CCodec.Create(c_codec, &compressor))

if not isinstance(buf, Buffer):
buf = frombuffer(buf)
buf = py_buffer(buf)

c_buf = (<Buffer> buf).buffer.get()

Expand Down
4 changes: 2 additions & 2 deletions python/pyarrow/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from pyarrow.compat import builtin_pickle
from pyarrow.lib import (SerializationContext, _default_serialization_context,
frombuffer)
py_buffer)

try:
import cloudpickle
Expand All @@ -46,7 +46,7 @@ def _deserialize_numpy_array_list(data):

def _pickle_to_buffer(x):
pickled = builtin_pickle.dumps(x, protocol=builtin_pickle.HIGHEST_PROTOCOL)
return frombuffer(pickled)
return py_buffer(pickled)


def _load_pickle_from_buffer(data):
Expand Down
38 changes: 19 additions & 19 deletions python/pyarrow/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def test_python_file_closing():
def test_buffer_bytes():
val = b'some data'

buf = pa.frombuffer(val)
buf = pa.py_buffer(val)
assert isinstance(buf, pa.Buffer)
assert not buf.is_mutable

Expand All @@ -182,7 +182,7 @@ def test_buffer_bytes():
def test_buffer_memoryview():
val = b'some data'

buf = pa.frombuffer(val)
buf = pa.py_buffer(val)
assert isinstance(buf, pa.Buffer)
assert not buf.is_mutable

Expand All @@ -194,7 +194,7 @@ def test_buffer_memoryview():
def test_buffer_bytearray():
val = bytearray(b'some data')

buf = pa.frombuffer(val)
buf = pa.py_buffer(val)
assert isinstance(buf, pa.Buffer)
assert buf.is_mutable

Expand All @@ -206,14 +206,14 @@ def test_buffer_bytearray():
def test_buffer_invalid():
with pytest.raises(TypeError,
match="(bytes-like object|buffer interface)"):
pa.frombuffer(None)
pa.py_buffer(None)


def test_buffer_to_numpy():
# Make sure creating a numpy array from an arrow buffer works
byte_array = bytearray(20)
byte_array[0] = 42
buf = pa.frombuffer(byte_array)
buf = pa.py_buffer(byte_array)
array = np.frombuffer(buf, dtype="uint8")
assert array[0] == byte_array[0]
byte_array[0] += 1
Expand All @@ -224,14 +224,14 @@ def test_buffer_to_numpy():
def test_buffer_from_numpy():
# C-contiguous
arr = np.arange(12, dtype=np.int8).reshape((3, 4))
buf = pa.frombuffer(arr)
buf = pa.py_buffer(arr)
assert buf.to_pybytes() == arr.tobytes()
# F-contiguous; note strides informations is lost
buf = pa.frombuffer(arr.T)
buf = pa.py_buffer(arr.T)
assert buf.to_pybytes() == arr.tobytes()
# Non-contiguous
with pytest.raises(ValueError, match="not contiguous"):
buf = pa.frombuffer(arr.T[::2])
buf = pa.py_buffer(arr.T[::2])


def test_buffer_equals():
Expand All @@ -250,11 +250,11 @@ def ne(a, b):
b2 = bytearray(b1)
b3 = bytearray(b1)
b3[0] = 42
buf1 = pa.frombuffer(b1)
buf2 = pa.frombuffer(b2)
buf3 = pa.frombuffer(b2)
buf4 = pa.frombuffer(b3)
buf5 = pa.frombuffer(np.frombuffer(b2, dtype=np.int16))
buf1 = pa.py_buffer(b1)
buf2 = pa.py_buffer(b2)
buf3 = pa.py_buffer(b2)
buf4 = pa.py_buffer(b3)
buf5 = pa.py_buffer(np.frombuffer(b2, dtype=np.int16))
eq(buf1, buf1)
eq(buf1, buf2)
eq(buf2, buf3)
Expand All @@ -266,7 +266,7 @@ def ne(a, b):
def test_buffer_hashing():
# Buffers are unhashable
with pytest.raises(TypeError, match="unhashable"):
hash(pa.frombuffer(b'123'))
hash(pa.py_buffer(b'123'))


def test_foreign_buffer():
Expand Down Expand Up @@ -307,7 +307,7 @@ def test_compress_decompress():
test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
.astype(np.uint8)
.tostring())
test_buf = pa.frombuffer(test_data)
test_buf = pa.py_buffer(test_data)

codecs = ['lz4', 'snappy', 'gzip', 'zstd', 'brotli']
for codec in codecs:
Expand All @@ -333,7 +333,7 @@ def test_compress_decompress():
def test_buffer_memoryview_is_immutable():
val = b'some data'

buf = pa.frombuffer(val)
buf = pa.py_buffer(val)
assert not buf.is_mutable
assert isinstance(buf, pa.Buffer)

Expand Down Expand Up @@ -368,9 +368,9 @@ def test_uninitialized_buffer():
with check_uninitialized():
memoryview(buf)
with check_uninitialized():
buf.equals(pa.frombuffer(b''))
buf.equals(pa.py_buffer(b''))
with check_uninitialized():
pa.frombuffer(b'').equals(buf)
pa.py_buffer(b'').equals(buf)


def test_memory_output_stream():
Expand Down Expand Up @@ -400,7 +400,7 @@ def test_inmemory_write_after_closed():

def test_buffer_protocol_ref_counting():
def make_buffer(bytes_obj):
return bytearray(pa.frombuffer(bytes_obj))
return bytearray(pa.py_buffer(bytes_obj))

buf = make_buffer(b'foo')
gc.collect()
Expand Down
6 changes: 3 additions & 3 deletions python/pyarrow/tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ class BufferClass(object):
pass

def serialize_buffer_class(obj):
return pa.frombuffer(b"hello")
return pa.py_buffer(b"hello")

def deserialize_buffer_class(serialized_obj):
return serialized_obj
Expand Down Expand Up @@ -581,7 +581,7 @@ def test_serialize_subclasses():


def test_serialize_to_components_invalid_cases():
buf = pa.frombuffer(b'hello')
buf = pa.py_buffer(b'hello')

components = {
'num_tensors': 0,
Expand Down Expand Up @@ -631,7 +631,7 @@ def test_deserialize_buffer_in_different_process():
import subprocess

f = tempfile.NamedTemporaryFile(delete=False)
b = pa.serialize(pa.frombuffer(b'hello')).to_buffer()
b = pa.serialize(pa.py_buffer(b'hello')).to_buffer()
f.write(b.to_pybytes())
f.close()

Expand Down
8 changes: 4 additions & 4 deletions python/pyarrow/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ def decorator(g):
return decorator


def _deprecate_class(old_name, new_name, klass, next_version='0.5.0'):
def _deprecate_api(old_name, new_name, api, next_version):
msg = ('pyarrow.{0} is deprecated as of {1}, please use {2} instead'
.format(old_name, next_version, new_name))

def deprecated_factory(*args, **kwargs):
def wrapper(*args, **kwargs):
warnings.warn(msg, FutureWarning)
return klass(*args)
return deprecated_factory
return api(*args)
return wrapper