Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track the size of custom allocations for use via Array::get_buffer_memory_size #5347

Merged
merged 1 commit into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {

/// Returns the total number of bytes of memory pointed to by this array.
/// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
/// Note that this does not always correspond to the exact memory usage of an array,
/// since multiple arrays can share the same buffers or slices thereof.
fn get_buffer_memory_size(&self) -> usize;

/// Returns the total number of bytes of memory occupied physically by this array.
Expand Down Expand Up @@ -934,6 +936,17 @@ mod tests {
);
}

#[test]
fn test_memory_size_primitive_sliced() {
let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
let slice1 = arr.slice(0, 64);
let slice2 = arr.slice(64, 64);

// both slices report the full buffer memory usage, even though the buffers are shared
assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
}

#[test]
fn test_memory_size_primitive_nullable() {
let arr: PrimitiveArray<Int64Type> = (0..128)
Expand Down
21 changes: 18 additions & 3 deletions arrow-buffer/src/alloc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ pub(crate) enum Deallocation {
Standard(Layout),
/// An allocation from an external source like the FFI interface
/// Deallocation will happen on `Allocation::drop`
Custom(Arc<dyn Allocation>),
/// The size of the allocation is tracked here separately only
/// for memory usage reporting via `Array::get_buffer_memory_size`
Custom(Arc<dyn Allocation>, usize),
}

impl Debug for Deallocation {
Expand All @@ -47,9 +49,22 @@ impl Debug for Deallocation {
Deallocation::Standard(layout) => {
write!(f, "Deallocation::Standard {layout:?}")
}
Deallocation::Custom(_) => {
write!(f, "Deallocation::Custom {{ capacity: unknown }}")
Deallocation::Custom(_, size) => {
write!(f, "Deallocation::Custom {{ capacity: {size} }}")
}
}
}
}

#[cfg(test)]
mod tests {
use crate::alloc::Deallocation;

#[test]
fn test_size_of_deallocation() {
assert_eq!(
std::mem::size_of::<Deallocation>(),
3 * std::mem::size_of::<usize>()
);
}
}
2 changes: 1 addition & 1 deletion arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ impl Buffer {
len: usize,
owner: Arc<dyn Allocation>,
) -> Self {
Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner))
Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
}

/// Auxiliary method to create a new Buffer
Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/buffer/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
is_aligned,
"Memory pointer is not aligned with the specified scalar type"
),
Deallocation::Custom(_) =>
Deallocation::Custom(_, _) =>
assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."),
}

Expand Down
13 changes: 7 additions & 6 deletions arrow-buffer/src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ impl Bytes {
pub fn capacity(&self) -> usize {
match self.deallocation {
Deallocation::Standard(layout) => layout.size(),
// we cannot determine this in general,
// and thus we state that this is externally-owned memory
Deallocation::Custom(_) => 0,
// we only know the size of the custom allocation
// its underlying capacity might be larger
Deallocation::Custom(_, size) => size,
}
}

Expand All @@ -116,7 +116,7 @@ impl Drop for Bytes {
_ => unsafe { std::alloc::dealloc(self.ptr.as_ptr(), *layout) },
},
// The automatic drop implementation will free the memory once the reference count reaches zero
Deallocation::Custom(_allocation) => (),
Deallocation::Custom(_allocation, _size) => (),
}
}
}
Expand Down Expand Up @@ -147,10 +147,11 @@ impl Debug for Bytes {

impl From<bytes::Bytes> for Bytes {
fn from(value: bytes::Bytes) -> Self {
let len = value.len();
Self {
len: value.len(),
len,
ptr: NonNull::new(value.as_ptr() as _).unwrap(),
deallocation: Deallocation::Custom(std::sync::Arc::new(value)),
deallocation: Deallocation::Custom(std::sync::Arc::new(value), len),
}
}
}
Expand Down
Loading