Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update py03 from 0.20 to 0.21 #5566

Merged
merged 10 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-pyarrow-integration-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ crate-type = ["cdylib"]

[dependencies]
arrow = { path = "../arrow", features = ["pyarrow"] }
pyo3 = { version = "0.20", features = ["extension-module"] }
pyo3 = { version = "0.21", features = ["extension-module"] }
25 changes: 9 additions & 16 deletions arrow-pyarrow-integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ fn to_py_err(err: ArrowError) -> PyErr {

/// Returns `array + array` of an int64 array.
#[pyfunction]
fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
fn double(array: &Bound<PyAny>, py: Python) -> PyResult<PyObject> {
// import
let array = make_array(ArrayData::from_pyarrow(array)?);
let array = make_array(ArrayData::from_pyarrow_bound(&array)?);

// perform some operation
let array = array
Expand All @@ -60,15 +60,15 @@ fn double(array: &PyAny, py: Python) -> PyResult<PyObject> {
/// calls a lambda function that receives and returns an array
/// whose result must be the array multiplied by two
#[pyfunction]
fn double_py(lambda: &PyAny, py: Python) -> PyResult<bool> {
fn double_py(lambda: &Bound<PyAny>, py: Python) -> PyResult<bool> {
// create
let array = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
let expected = Arc::new(Int64Array::from(vec![Some(2), None, Some(6)])) as ArrayRef;

// to py
let pyarray = array.to_data().to_pyarrow(py)?;
let pyarray = lambda.call1((pyarray,))?;
let array = make_array(ArrayData::from_pyarrow(pyarray)?);
let array = make_array(ArrayData::from_pyarrow_bound(&pyarray)?);

Ok(array == expected)
}
Expand All @@ -82,16 +82,12 @@ fn make_empty_array(datatype: PyArrowType<DataType>, py: Python) -> PyResult<PyO

/// Returns the substring
#[pyfunction]
fn substring(
array: PyArrowType<ArrayData>,
start: i64,
) -> PyResult<PyArrowType<ArrayData>> {
fn substring(array: PyArrowType<ArrayData>, start: i64) -> PyResult<PyArrowType<ArrayData>> {
// import
let array = make_array(array.0);

// substring
let array =
kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;
let array = kernels::substring::substring(array.as_ref(), start, None).map_err(to_py_err)?;

Ok(array.to_data().into())
}
Expand All @@ -102,8 +98,7 @@ fn concatenate(array: PyArrowType<ArrayData>, py: Python) -> PyResult<PyObject>
let array = make_array(array.0);

// concat
let array =
kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;
let array = kernels::concat::concat(&[array.as_ref(), array.as_ref()]).map_err(to_py_err)?;

array.to_data().to_pyarrow(py)
}
Expand All @@ -129,9 +124,7 @@ fn round_trip_array(obj: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayDa
}

#[pyfunction]
fn round_trip_record_batch(
obj: PyArrowType<RecordBatch>,
) -> PyResult<PyArrowType<RecordBatch>> {
fn round_trip_record_batch(obj: PyArrowType<RecordBatch>) -> PyResult<PyArrowType<RecordBatch>> {
Ok(obj)
}

Expand Down Expand Up @@ -168,7 +161,7 @@ fn boxed_reader_roundtrip(
}

#[pymodule]
fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
fn arrow_pyarrow_integration_testing(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(double))?;
m.add_wrapped(wrap_pyfunction!(double_py))?;
m.add_wrapped(wrap_pyfunction!(make_empty_array))?;
Expand Down
2 changes: 1 addition & 1 deletion arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ arrow-select = { workspace = true }
arrow-string = { workspace = true }

rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
pyo3 = { version = "0.20", default-features = false, optional = true }
pyo3 = { version = "0.21", default-features = false, optional = true }

[package.metadata.docs.rs]
features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
Expand Down
96 changes: 55 additions & 41 deletions arrow/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,12 @@ fn to_py_err(err: ArrowError) -> PyErr {
}

pub trait FromPyArrow: Sized {
fn from_pyarrow(value: &PyAny) -> PyResult<Self>;
#[deprecated(since = "52.0.0", note = "Use from_pyarrow_bound")]
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
Self::from_pyarrow_bound(&value.as_borrowed())
}

fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self>;
}

/// Create a new PyArrow object from a arrow-rs type.
Expand All @@ -101,15 +106,19 @@ impl<T: ToPyArrow> IntoPyArrow for T {
}
}

fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
let pyarrow = PyModule::import(value.py(), "pyarrow")?;
fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
let pyarrow = PyModule::import_bound(value.py(), "pyarrow")?;
let class = pyarrow.getattr(expected)?;
if !value.is_instance(class)? {
let expected_module = class.getattr("__module__")?.extract::<&str>()?;
let expected_name = class.getattr("__name__")?.extract::<&str>()?;
if !value.is_instance(&class)? {
let expected_module = class.getattr("__module__")?;
let expected_module = expected_module.extract::<&str>()?;
let expected_name = class.getattr("__name__")?;
let expected_name = expected_name.extract::<&str>()?;
let found_class = value.get_type();
let found_module = found_class.getattr("__module__")?.extract::<&str>()?;
let found_name = found_class.getattr("__name__")?.extract::<&str>()?;
let found_module = found_class.getattr("__module__")?;
let found_module = found_module.extract::<&str>()?;
let found_name = found_class.getattr("__name__")?;
let found_name = found_name.extract::<&str>()?;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Jefffrey these extracts should be changed to extract_bound. extract only exists with gil-refs enabled. I have no clue why the extract version is passing CI.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, a bit of digging, and a public extract actually does still exist, but it's been redirected to extract_bound. https://github.com/PyO3/pyo3/blob/c1f11fb4bddc836f820e0db3db514b4742b8a3e7/src/types/any.rs#L805

I have no clue what's causing my build to freak out

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also breaks for me when gil-refs is not enabled. From this issue it appears that &str lost FromPyObject:

Finally, without the gil-refs feature we remove the existing implementation of FromPyObject for &str and instead implement this new trait. This works except for abi3 on old Python versions, where we have to settle for PyFunctionArgument only and allow users of .extract() to break.

This does implement some very targeted breakage for specific considerations, but I wonder if this is necessary as the correct way to users off the GIL Ref API in a case where otherwise it's near impossible for them to realise this uses it.

The reason it is not caught by CI is because CI does not have any abi... feature enabled.

return Err(PyTypeError::new_err(format!(
"Expected instance of {}.{}, got {}.{}",
expected_module, expected_name, found_module, found_name
Expand All @@ -118,7 +127,7 @@ fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
Ok(())
}

fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> {
let capsule_name = capsule.name()?;
if capsule_name.is_none() {
return Err(PyValueError::new_err(
Expand All @@ -138,13 +147,13 @@ fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
}

impl FromPyArrow for DataType {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -166,21 +175,21 @@ impl ToPyArrow for DataType {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("DataType")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
}
}

impl FromPyArrow for Field {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -202,21 +211,21 @@ impl ToPyArrow for Field {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Field")?;
let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(dtype.into())
}
}

impl FromPyArrow for Schema {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_schema__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_schema")?;

let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
Expand All @@ -238,15 +247,15 @@ impl ToPyArrow for Schema {
fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Schema")?;
let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
Ok(schema.into())
}
}

impl FromPyArrow for ArrayData {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand All @@ -259,8 +268,10 @@ impl FromPyArrow for ArrayData {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule = tuple.get_item(0)?;
let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
let array_capsule = tuple.get_item(1)?;
let array_capsule = array_capsule.downcast::<PyCapsule>()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -296,7 +307,7 @@ impl ToPyArrow for ArrayData {
let array = FFI_ArrowArray::new(self);
let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;

let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("Array")?;
let array = class.call_method1(
"_import_from_c",
Expand All @@ -310,9 +321,9 @@ impl ToPyArrow for ArrayData {
}

impl<T: FromPyArrow> FromPyArrow for Vec<T> {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
let list = value.downcast::<PyList>()?;
list.iter().map(|x| T::from_pyarrow(x)).collect()
list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
}
}

Expand All @@ -327,7 +338,7 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
}

impl FromPyArrow for RecordBatch {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
Expand All @@ -340,8 +351,10 @@ impl FromPyArrow for RecordBatch {
));
}

let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
let schema_capsule = tuple.get_item(0)?;
let schema_capsule = schema_capsule.downcast::<PyCapsule>()?;
let array_capsule = tuple.get_item(1)?;
let array_capsule = array_capsule.downcast::<PyCapsule>()?;

validate_pycapsule(schema_capsule, "arrow_schema")?;
validate_pycapsule(array_capsule, "arrow_array")?;
Expand Down Expand Up @@ -370,12 +383,13 @@ impl FromPyArrow for RecordBatch {
validate_class("RecordBatch", value)?;
// TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches
let schema = value.getattr("schema")?;
let schema = Arc::new(Schema::from_pyarrow(schema)?);
let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?);

let arrays = value.getattr("columns")?.downcast::<PyList>()?;
let arrays = value.getattr("columns")?;
let arrays = arrays
.downcast::<PyList>()?
.iter()
.map(|a| Ok(make_array(ArrayData::from_pyarrow(a)?)))
.map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
.collect::<PyResult<_>>()?;

let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?;
Expand All @@ -395,13 +409,13 @@ impl ToPyArrow for RecordBatch {

/// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader].
impl FromPyArrow for ArrowArrayStreamReader {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
// Newer versions of PyArrow as well as other libraries with Arrow data implement this
// method, so prefer it over _export_to_c.
// See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
if value.hasattr("__arrow_c_stream__")? {
let capsule: &PyCapsule =
PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
let capsule = capsule.downcast::<PyCapsule>()?;
validate_pycapsule(capsule, "arrow_array_stream")?;

let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
Expand All @@ -421,7 +435,7 @@ impl FromPyArrow for ArrowArrayStreamReader {
// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe.
// In particular, `_export_to_c` can go out of bounds
let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(value.py(), [stream_ptr as Py_uintptr_t]);
value.call_method1("_export_to_c", args)?;

let stream_reader = ArrowArrayStreamReader::try_new(stream)
Expand All @@ -439,9 +453,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
let mut stream = FFI_ArrowArrayStream::new(self);

let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
let module = py.import("pyarrow")?;
let module = py.import_bound("pyarrow")?;
let class = module.getattr("RecordBatchReader")?;
let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t]);
let args = PyTuple::new_bound(py, [stream_ptr as Py_uintptr_t]);
let reader = class.call_method1("_import_from_c", args)?;

Ok(PyObject::from(reader))
Expand All @@ -463,8 +477,8 @@ impl IntoPyArrow for ArrowArrayStreamReader {
pub struct PyArrowType<T>(pub T);

impl<'source, T: FromPyArrow> FromPyObject<'source> for PyArrowType<T> {
fn extract(value: &'source PyAny) -> PyResult<Self> {
Ok(Self(T::from_pyarrow(value)?))
fn extract_bound(value: &Bound<'source, PyAny>) -> PyResult<Self> {
Ok(Self(T::from_pyarrow_bound(value)?))
}
}

Expand Down
4 changes: 2 additions & 2 deletions arrow/tests/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ fn test_to_pyarrow() {

let res = Python::with_gil(|py| {
let py_input = input.to_pyarrow(py)?;
let records = RecordBatch::from_pyarrow(py_input.as_ref(py))?;
let records = RecordBatch::from_pyarrow_bound(py_input.bind(py))?;
let py_records = records.to_pyarrow(py)?;
RecordBatch::from_pyarrow(py_records.as_ref(py))
RecordBatch::from_pyarrow_bound(py_records.bind(py))
})
.unwrap();

Expand Down
Loading