Skip to content

Commit

Permalink
Cleanup FFI interface (#3684) (#3683) (#3685)
Browse files Browse the repository at this point in the history
* Cleanup FFI interface (#3684) (#3683)

* Add import example

* Tweak doc example

* Use ManuallyDrop to model external memory
  • Loading branch information
tustvold committed Feb 9, 2023
1 parent 98ce68f commit bb4fc59
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 95 deletions.
20 changes: 7 additions & 13 deletions arrow/src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ impl TryFrom<ArrayData> for ffi::ArrowArray {
/// # Safety
/// Assumes that these pointers represent valid C Data Interfaces, both in memory
/// representation and lifetime via the `release` mechanism.
#[deprecated(note = "Use ArrowArray::new")]
#[allow(deprecated)]
pub unsafe fn make_array_from_raw(
array: *const ffi::FFI_ArrowArray,
schema: *const ffi::FFI_ArrowSchema,
Expand Down Expand Up @@ -91,30 +93,22 @@ mod tests {
StructArray, UInt32Array, UInt64Array,
},
datatypes::{DataType, Field},
ffi::ArrowArray,
ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema},
};
use std::convert::TryFrom;
use std::sync::Arc;

fn test_round_trip(expected: &ArrayData) -> Result<()> {
// create a `ArrowArray` from the data.
let d1 = ArrowArray::try_from(expected.clone())?;

// here we export the array as 2 pointers. We would have no control over ownership if it was not for
// the release mechanism.
let (array, schema) = ArrowArray::into_raw(d1);
// here we export the array
let array = FFI_ArrowArray::new(expected);
let schema = FFI_ArrowSchema::try_from(expected.data_type())?;

// simulate an external consumer by being the consumer
let d1 = unsafe { ArrowArray::try_from_raw(array, schema) }?;
let d1 = ArrowArray::new(array, schema);

let result = &ArrayData::try_from(d1)?;

assert_eq!(result, expected);

unsafe {
Arc::from_raw(array);
Arc::from_raw(schema);
}
Ok(())
}

Expand Down
1 change: 1 addition & 0 deletions arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub use arrow_data::{
pub use arrow_data::transform::{Capacities, MutableArrayData};

#[cfg(feature = "ffi")]
#[allow(deprecated)]
pub use self::ffi::{export_array_into_raw, make_array_from_raw};

// --------------------- Array's values comparison ---------------------
Expand Down
184 changes: 111 additions & 73 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,68 +24,63 @@
//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
//! `Buffer`, etc. This is handled by `ArrowArray`.
//!
//!
//! Export to FFI
//!
//! ```rust
//! # use std::sync::Arc;
//! # use arrow::array::{Int32Array, Array, ArrayData, export_array_into_raw, make_array, make_array_from_raw};
//! # use arrow::error::{Result, ArrowError};
//! # use arrow::array::{Int32Array, Array, ArrayData, make_array};
//! # use arrow::error::Result;
//! # use arrow::compute::kernels::arithmetic;
//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
//! # use std::convert::TryFrom;
//! # fn main() -> Result<()> {
//! // create an array natively
//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
//! let data = array.into_data();
//!
//! // export it
//!
//! let ffi_array = ArrowArray::try_new(array.data().clone())?;
//! let (array_ptr, schema_ptr) = ArrowArray::into_raw(ffi_array);
//!
//! // consumed and used by something else...
//! // Export it
//! let out_array = FFI_ArrowArray::new(&data);
//! let out_schema = FFI_ArrowSchema::try_from(data.data_type())?;
//!
//! // import it
//! let array = unsafe { make_array_from_raw(array_ptr, schema_ptr)? };
//! let array = ArrowArray::new(out_array, out_schema);
//! let array = Int32Array::from(ArrayData::try_from(array)?);
//!
//! // perform some operation
//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
//! ArrowError::ParseError("Expects an int32".to_string()),
//! )?;
//! let array = arithmetic::add(&array, &array)?;
//!
//! // verify
//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
//! #
//! # Ok(())
//! # }
//! ```
//!
//! // Simulate if raw pointers are provided by consumer
//! let array = make_array(Int32Array::from(vec![Some(1), None, Some(3)]).into_data());
//!
//! let out_array = Box::new(FFI_ArrowArray::empty());
//! let out_schema = Box::new(FFI_ArrowSchema::empty());
//! let out_array_ptr = Box::into_raw(out_array);
//! let out_schema_ptr = Box::into_raw(out_schema);
//!
//! // export array into raw pointers from consumer
//! unsafe { export_array_into_raw(array, out_array_ptr, out_schema_ptr)?; };
//!
//! // import it
//! let array = unsafe { make_array_from_raw(out_array_ptr, out_schema_ptr)? };
//! Import from FFI
//!
//! // perform some operation
//! let array = array.as_any().downcast_ref::<Int32Array>().ok_or(
//! ArrowError::ParseError("Expects an int32".to_string()),
//! )?;
//! let array = arithmetic::add(&array, &array)?;
//!
//! // verify
//! assert_eq!(array, Int32Array::from(vec![Some(2), None, Some(6)]));
//! ```
//! # use std::ptr::addr_of_mut;
//! # use arrow::ffi::{ArrowArray, FFI_ArrowArray, FFI_ArrowSchema};
//! # use arrow_array::{ArrayRef, make_array};
//! # use arrow_schema::ArrowError;
//! #
//! /// A foreign data container that can export to C Data interface
//! struct ForeignArray {};
//!
//! // (drop/release)
//! unsafe {
//! Box::from_raw(out_array_ptr);
//! Box::from_raw(out_schema_ptr);
//! Arc::from_raw(array_ptr);
//! Arc::from_raw(schema_ptr);
//! impl ForeignArray {
//! /// Export from foreign array representation to C Data interface
//! /// e.g. <https://github.com/apache/arrow/blob/fc1f9ebbc4c3ae77d5cfc2f9322f4373d3d19b8a/python/pyarrow/array.pxi#L1552>
//! fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut FFI_ArrowSchema) {
//! // ...
//! }
//! }
//!
//! Ok(())
//! /// Import an [`ArrayRef`] from a [`ForeignArray`]
//! fn import_array(foreign: &ForeignArray) -> Result<ArrayRef, ArrowError> {
//! let mut schema = FFI_ArrowSchema::empty();
//! let mut array = FFI_ArrowArray::empty();
//! foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
//! Ok(make_array(ArrowArray::new(array, schema).try_into()?))
//! }
//! ```

Expand Down Expand Up @@ -139,7 +134,15 @@ bitflags! {

/// ABI-compatible struct for `ArrowSchema` from C Data Interface
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
/// This was created by bindgen
///
/// ```
/// # use arrow::ffi::FFI_ArrowSchema;
/// # use arrow_data::ArrayData;
/// fn array_schema(data: &ArrayData) -> FFI_ArrowSchema {
/// FFI_ArrowSchema::try_from(data.data_type()).unwrap()
/// }
/// ```
///
#[repr(C)]
#[derive(Debug)]
pub struct FFI_ArrowSchema {
Expand Down Expand Up @@ -394,7 +397,14 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {

/// ABI-compatible struct for ArrowArray from C Data Interface
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
/// This was created by bindgen
///
/// ```
/// # use arrow::ffi::FFI_ArrowArray;
/// # use arrow_array::Array;
/// fn export_array(array: &dyn Array) -> FFI_ArrowArray {
/// FFI_ArrowArray::new(array.data())
/// }
/// ```
#[repr(C)]
#[derive(Debug)]
pub struct FFI_ArrowArray {
Expand Down Expand Up @@ -859,6 +869,14 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> {
}

impl ArrowArray {
/// Creates a new [`ArrowArray`] from the provided array and schema
pub fn new(array: FFI_ArrowArray, schema: FFI_ArrowSchema) -> Self {
Self {
array: Arc::new(array),
schema: Arc::new(schema),
}
}

/// creates a new `ArrowArray`. This is used to export to the C Data Interface.
///
/// # Memory Leaks
Expand All @@ -878,6 +896,7 @@ impl ArrowArray {
/// on managing the allocation of the structs by themselves.
/// # Error
/// Errors if any of the pointers is null
#[deprecated(note = "Use ArrowArray::new")]
pub unsafe fn try_from_raw(
array: *const FFI_ArrowArray,
schema: *const FFI_ArrowSchema,
Expand Down Expand Up @@ -911,6 +930,7 @@ impl ArrowArray {
}

/// exports [ArrowArray] to the C Data Interface
#[deprecated(note = "Use FFI_ArrowArray and FFI_ArrowSchema directly")]
pub fn into_raw(this: ArrowArray) -> (*const FFI_ArrowArray, *const FFI_ArrowSchema) {
(Arc::into_raw(this.array), Arc::into_raw(this.schema))
}
Expand Down Expand Up @@ -946,28 +966,49 @@ mod tests {
use arrow_array::types::{Float64Type, Int32Type};
use arrow_array::{Float64Array, UnionArray};
use std::convert::TryFrom;
use std::mem::ManuallyDrop;
use std::ptr::addr_of_mut;

#[test]
fn test_round_trip() -> Result<()> {
fn test_round_trip() {
// create an array natively
let array = Int32Array::from(vec![1, 2, 3]);

// export it
let array = ArrowArray::try_from(array.into_data())?;
let array = ArrowArray::try_from(array.into_data()).unwrap();

// (simulate consumer) import it
let data = ArrayData::try_from(array)?;
let array = make_array(data);

// perform some operation
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
let array = kernels::arithmetic::add(array, array).unwrap();
let array = Int32Array::from(ArrayData::try_from(array).unwrap());
let array = kernels::arithmetic::add(&array, &array).unwrap();

// verify
assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
}

// (drop/release)
Ok(())
#[test]
fn test_import() {
// Model receiving const pointers from an external system

// Create an array natively
let data = Int32Array::from(vec![1, 2, 3]).into_data();
let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
let array = FFI_ArrowArray::new(&data);

// Use ManuallyDrop to avoid Box:Drop recursing
let schema = Box::new(ManuallyDrop::new(schema));
let array = Box::new(ManuallyDrop::new(array));

let schema_ptr = &**schema as *const _;
let array_ptr = &**array as *const _;

// We can read them back to memory
// SAFETY:
// Pointers are aligned and valid
let array =
unsafe { ArrowArray::new(ptr::read(array_ptr), ptr::read(schema_ptr)) };

let array = Int32Array::from(ArrayData::try_from(array).unwrap());
assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
}

#[test]
Expand Down Expand Up @@ -1424,31 +1465,28 @@ mod tests {
let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());

// Assume two raw pointers provided by the consumer
let out_array = Box::new(FFI_ArrowArray::empty());
let out_schema = Box::new(FFI_ArrowSchema::empty());
let out_array_ptr = Box::into_raw(out_array);
let out_schema_ptr = Box::into_raw(out_schema);

unsafe {
export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
let mut out_array = FFI_ArrowArray::empty();
let mut out_schema = FFI_ArrowSchema::empty();

{
let out_array_ptr = addr_of_mut!(out_array);
let out_schema_ptr = addr_of_mut!(out_schema);
unsafe {
export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
}
}

// (simulate consumer) import it
unsafe {
let array = ArrowArray::try_from_raw(out_array_ptr, out_schema_ptr).unwrap();
let data = ArrayData::try_from(array)?;
let array = make_array(data);

// perform some operation
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
let array = kernels::arithmetic::add(array, array).unwrap();
let array = ArrowArray::new(out_array, out_schema);
let data = ArrayData::try_from(array)?;
let array = make_array(data);

// verify
assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
// perform some operation
let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
let array = kernels::arithmetic::add(array, array).unwrap();

drop(Box::from_raw(out_array_ptr));
drop(Box::from_raw(out_schema_ptr));
}
// verify
assert_eq!(array, Int32Array::from(vec![2, 4, 6]));
Ok(())
}

Expand Down
16 changes: 7 additions & 9 deletions arrow/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
//! arrays from and to Python.

use std::convert::{From, TryFrom};
use std::ptr::addr_of_mut;
use std::sync::Arc;

use pyo3::ffi::Py_uintptr_t;
Expand All @@ -30,7 +31,7 @@ use crate::array::{make_array, Array, ArrayData};
use crate::datatypes::{DataType, Field, Schema};
use crate::error::ArrowError;
use crate::ffi;
use crate::ffi::FFI_ArrowSchema;
use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
use crate::ffi_stream::{
export_reader_into_raw, ArrowArrayStreamReader, FFI_ArrowArrayStream,
};
Expand Down Expand Up @@ -111,24 +112,21 @@ impl PyArrowConvert for Schema {
impl PyArrowConvert for ArrayData {
fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
// prepare a pointer to receive the Array struct
let (array_pointer, schema_pointer) =
ffi::ArrowArray::into_raw(unsafe { ffi::ArrowArray::empty() });
let mut array = FFI_ArrowArray::empty();
let mut schema = FFI_ArrowSchema::empty();

// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe.
// In particular, `_export_to_c` can go out of bounds
value.call_method1(
"_export_to_c",
(
array_pointer as Py_uintptr_t,
schema_pointer as Py_uintptr_t,
addr_of_mut!(array) as Py_uintptr_t,
addr_of_mut!(schema) as Py_uintptr_t,
),
)?;

let ffi_array = unsafe {
ffi::ArrowArray::try_from_raw(array_pointer, schema_pointer)
.map_err(to_py_err)?
};
let ffi_array = ffi::ArrowArray::new(array, schema);
let data = ArrayData::try_from(ffi_array).map_err(to_py_err)?;

Ok(data)
Expand Down

0 comments on commit bb4fc59

Please sign in to comment.