import ctypes
import pyarrow as pa
# C Data Interface layouts
class ArrowSchema(ctypes.Structure):
_fields_ = [
("format", ctypes.c_char_p),
("name", ctypes.c_char_p),
("metadata", ctypes.c_char_p),
("flags", ctypes.c_int64),
("n_children", ctypes.c_int64),
("children", ctypes.POINTER(ctypes.c_void_p)),
("dictionary", ctypes.c_void_p),
("release", ctypes.c_void_p),
("private_data", ctypes.c_void_p),
]
class ArrowArray(ctypes.Structure):
_fields_ = [
("length", ctypes.c_int64),
("null_count", ctypes.c_int64),
("offset", ctypes.c_int64),
("n_buffers", ctypes.c_int64),
("n_children", ctypes.c_int64),
("buffers", ctypes.POINTER(ctypes.c_void_p)),
("children", ctypes.POINTER(ctypes.c_void_p)),
("dictionary", ctypes.c_void_p),
("release", ctypes.c_void_p),
("private_data", ctypes.c_void_p),
]
c_array = ArrowArray()
c_schema = ArrowSchema()
arrs = [
# exporting this is fine
pa.array([b'a', b'e', b'a', b'e'], type=pa.binary_view()),
# this also works fine
pa.chunked_array([
pa.array([b'a', b'e'], type=pa.binary_view()),
pa.array([b'a', b'e'], type=pa.binary_view())
]).combine_chunks(),
# chunked(binary...) -> binary -> binary_view segfaults
pa.chunked_array([
pa.array([b'a', b'e'], type=pa.binary()),
pa.array([b'a', b'e'], type=pa.binary())
]).combine_chunks().cast(pa.binary_view()),
]
print(arrs)
print([arr.validate(full=True) for arr in arrs])
arrs[0]._export_to_c(ctypes.addressof(c_array),ctypes.addressof(c_schema),)
arrs[1]._export_to_c(ctypes.addressof(c_array),ctypes.addressof(c_schema),)
# this fails
arrs[2]._export_to_c(ctypes.addressof(c_array),ctypes.addressof(c_schema),)
Segmentation fault (core dumped)
Describe the bug, including details regarding any error messages, version, and platform.
_export_to_cseems to cause segmentation fault forbinary_viewarray constructed as follows:ChunkedArray(binary...) --combine--> BinaryArray --cast--> BinaryViewArrayPython version:
3.11.13PyArrow version:
23.0.1Reproducer below
Component(s)
Python