Skip to content

Commit

Permalink
ARROW-8026: [Python] Support memoryview as a value type for creating …
Browse files Browse the repository at this point in the history
…binary-like arrays

This also handles non-contiguous memory views.

Closes #6574 from wesm/ARROW-8026

Authored-by: Wes McKinney <wesm+git@apache.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
wesm committed Mar 11, 2020
1 parent 2fd4892 commit 14426df
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 7 deletions.
16 changes: 12 additions & 4 deletions cpp/src/arrow/python/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,14 @@ struct PyBytesView {
this->size = PyByteArray_GET_SIZE(obj);
this->ref.reset();
return Status::OK();
} else if (PyMemoryView_Check(obj)) {
PyObject* contig_view = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
RETURN_IF_PYERROR();
this->ref.reset(contig_view);
Py_buffer* buf = PyMemoryView_GET_BUFFER(contig_view);
this->bytes = reinterpret_cast<const char*>(buf->buf);
this->size = buf->len;
return Status::OK();
} else {
return Status::TypeError("Expected ", expected_msg, ", got a '",
Py_TYPE(obj)->tp_name, "' object");
Expand All @@ -266,10 +274,6 @@ struct PyBytesView {
OwnedRef ref;
};

// Return the common PyArrow memory pool
ARROW_PYTHON_EXPORT void set_default_memory_pool(MemoryPool* pool);
ARROW_PYTHON_EXPORT MemoryPool* get_memory_pool();

class ARROW_PYTHON_EXPORT PyBuffer : public Buffer {
public:
/// While memoryview objects support multi-dimensional buffers, PyBuffer only supports
Expand All @@ -285,6 +289,10 @@ class ARROW_PYTHON_EXPORT PyBuffer : public Buffer {
Py_buffer py_buf_;
};

// Return the common PyArrow memory pool
ARROW_PYTHON_EXPORT void set_default_memory_pool(MemoryPool* pool);
ARROW_PYTHON_EXPORT MemoryPool* get_memory_pool();

// This is annoying: because C++11 does not allow implicit conversion of string
// literals to non-const char*, we need to go through some gymnastics to use
// PyObject_CallMethod without a lot of pain (its arguments are non-const
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/python/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ ARROW_PYTHON_EXPORT
bool PyFloat_IsNaN(PyObject* obj);

inline bool IsPyBinary(PyObject* obj) {
return PyBytes_Check(obj) || PyByteArray_Check(obj);
return PyBytes_Check(obj) || PyByteArray_Check(obj) || PyMemoryView_Check(obj);
}

// \brief Convert a Python integer into a C integer
Expand Down
7 changes: 5 additions & 2 deletions python/pyarrow/tests/test_convert_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,16 +711,19 @@ def test_large_binary_value(ty):

def test_sequence_bytes():
u1 = b'ma\xc3\xb1ana'

data = [b'foo',
memoryview(b'dada'),
memoryview(b'd-a-t-a')[::2], # non-contiguous is made contiguous
u1.decode('utf-8'), # unicode gets encoded,
bytearray(b'bar'),
None]
for ty in [None, pa.binary(), pa.large_binary()]:
arr = pa.array(data, type=ty)
assert len(arr) == 4
assert len(arr) == 6
assert arr.null_count == 1
assert arr.type == ty or pa.binary()
assert arr.to_pylist() == [b'foo', u1, b'bar', None]
assert arr.to_pylist() == [b'foo', b'dada', b'data', u1, b'bar', None]


@pytest.mark.parametrize("ty", [pa.string(), pa.large_string()])
Expand Down

0 comments on commit 14426df

Please sign in to comment.