Skip to content

Commit

Permalink
fix: remove offsetbuffer
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikkon committed Apr 6, 2024
1 parent d8aba1b commit e26ac34
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 172 deletions.
107 changes: 64 additions & 43 deletions arrow-array/src/array/list_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{get_offsets, get_sizes, make_array, print_long_array};
use crate::array::{make_array, print_long_array};
use crate::builder::{GenericListViewBuilder, PrimitiveBuilder};
use crate::iterator::GenericListViewArrayIter;
use crate::{
new_empty_array, Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray,
OffsetSizeTrait,
};
use arrow_buffer::{NullBuffer, OffsetBuffer, SizeBuffer};
use arrow_buffer::{NullBuffer, ScalarBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{ArrowError, DataType, FieldRef};
use std::any::Any;
Expand All @@ -48,8 +48,8 @@ pub struct GenericListViewArray<OffsetSize: OffsetSizeTrait> {
data_type: DataType,
nulls: Option<NullBuffer>,
values: ArrayRef,
value_offsets: OffsetBuffer<OffsetSize>,
value_sizes: SizeBuffer<OffsetSize>,
value_offsets: ScalarBuffer<OffsetSize>,
value_sizes: ScalarBuffer<OffsetSize>,
len: usize,
}

Expand All @@ -76,27 +76,26 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
DataType::ListView
};

/// Returns the data type of the list view array
/// Create a new [`GenericListViewArray`] from the provided parts
///
/// # Errors
///
/// Errors if
///
/// * `offsets.len() != sizes.len()`
/// * `offsets.len() != nulls.len()`
/// * `offsets.last() > values.len()`
/// * `!field.is_nullable() && values.is_nullable()`
/// * `field.data_type() != values.data_type()`
/// * `0 <= offsets[i] <= length of the child array`
/// * `0 <= offsets[i] + size[i] <= length of the child array`
pub fn try_new(
field: FieldRef,
offsets: OffsetBuffer<OffsetSize>,
sizes: SizeBuffer<OffsetSize>,
offsets: ScalarBuffer<OffsetSize>,
sizes: ScalarBuffer<OffsetSize>,
values: ArrayRef,
nulls: Option<NullBuffer>,
) -> Result<Self, ArrowError> {
let mut len = 0usize;
for i in 0..offsets.len() {
let offset = offsets[i].as_usize();
let size = sizes[i].as_usize();
if offset + size > values.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid offset and size values for {}ListViewArray, offset: {}, size: {}, values.len(): {}",
OffsetSize::PREFIX, offset, size, values.len()
)));
}
len = len.add(size);
}

let len = offsets.len();
if len != sizes.len() {
return Err(ArrowError::InvalidArgumentError(format!(
Expand Down Expand Up @@ -132,6 +131,28 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
)));
}

let mut len = 0usize;
for i in 0..offsets.len() {
let length = values.len();
let offset = offsets[i].as_usize();
let size = sizes[i].as_usize();
if offset > length {
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid offset value for {}ListViewArray, offset: {}, length: {}",
OffsetSize::PREFIX,
offset,
length
)));
}
if offset + size > values.len() {
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid offset and size values for {}ListViewArray, offset: {}, size: {}, values.len(): {}",
OffsetSize::PREFIX, offset, size, values.len()
)));
}
len = len.add(size);
}

Ok(Self {
data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
nulls,
Expand All @@ -149,8 +170,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
/// Panics if [`Self::try_new`] returns an error
pub fn new(
field: FieldRef,
offsets: OffsetBuffer<OffsetSize>,
sizes: SizeBuffer<OffsetSize>,
offsets: ScalarBuffer<OffsetSize>,
sizes: ScalarBuffer<OffsetSize>,
values: ArrayRef,
nulls: Option<NullBuffer>,
) -> Self {
Expand All @@ -163,8 +184,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
Self {
data_type: Self::DATA_TYPE_CONSTRUCTOR(field),
nulls: Some(NullBuffer::new_null(len)),
value_offsets: OffsetBuffer::new_zeroed(len),
value_sizes: SizeBuffer::new_zeroed(len),
value_offsets: ScalarBuffer::new_zeroed(len),
value_sizes: ScalarBuffer::new_zeroed(len),
values,
len: 0usize,
}
Expand All @@ -175,8 +196,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
self,
) -> (
FieldRef,
OffsetBuffer<OffsetSize>,
SizeBuffer<OffsetSize>,
ScalarBuffer<OffsetSize>,
ScalarBuffer<OffsetSize>,
ArrayRef,
Option<NullBuffer>,
) {
Expand All @@ -195,10 +216,10 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {

/// Returns a reference to the offsets of this list
///
/// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`]
/// Unlike [`Self::value_offsets`] this returns the [`ScalarBuffer`]
/// allowing for zero-copy cloning
#[inline]
pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> {
pub fn offsets(&self) -> &ScalarBuffer<OffsetSize> {
&self.value_offsets
}

Expand All @@ -213,7 +234,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
/// Unlike [`Self::value_sizes`] this returns the [`SizeBuffer`]
/// allowing for zero-copy cloning
#[inline]
pub fn sizes(&self) -> &SizeBuffer<OffsetSize> {
pub fn sizes(&self) -> &ScalarBuffer<OffsetSize> {
&self.value_sizes
}

Expand Down Expand Up @@ -364,7 +385,7 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
size += self.value_offsets.inner().inner().capacity();
size += self.value_offsets.inner().capacity();
if let Some(n) = self.nulls.as_ref() {
size += n.buffer().capacity();
}
Expand All @@ -373,7 +394,7 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {

fn get_array_memory_size(&self) -> usize {
let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size();
size += self.value_offsets.inner().inner().capacity();
size += self.value_offsets.inner().capacity();
if let Some(n) = self.nulls.as_ref() {
size += n.buffer().capacity();
}
Expand All @@ -399,8 +420,8 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericListViewArray<OffsetSize>> for Arr
.len(len)
.nulls(array.nulls)
.buffers(vec![
array.value_offsets.into_inner().into_inner(),
array.value_sizes.into_inner().into_inner(),
array.value_offsets.into_inner(),
array.value_sizes.into_inner(),
])
.child_data(vec![array.values.to_data()]);

Expand All @@ -422,8 +443,8 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListViewAr
_ => unreachable!(),
};

let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
let sizes = SizeBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
let offsets = ScalarBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
let sizes = ScalarBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
Self {
data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
nulls: value.nulls().cloned(),
Expand Down Expand Up @@ -472,8 +493,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListViewArray<OffsetSize> {
let values = make_array(values);
// SAFETY:
// ArrayData is valid, and verified type above
let value_offsets = unsafe { get_offsets(&data) };
let value_sizes = unsafe { get_sizes(&data) };
let value_offsets = ScalarBuffer::from(data.buffers()[0].clone());
let value_sizes = ScalarBuffer::from(data.buffers()[1].clone());

Ok(Self {
data_type: data.data_type().clone(),
Expand All @@ -500,9 +521,9 @@ mod tests {
fn create_from_buffers() -> ListViewArray {
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]);
let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6]));
let offsets = ScalarBuffer::from(vec![0, 3, 6]);
let field = Arc::new(Field::new("item", DataType::Int32, true));
let sizes = SizeBuffer::new(ScalarBuffer::from(vec![3, 3, 2]));
let sizes = ScalarBuffer::from(vec![3, 3, 2]);

ListViewArray::new(field, offsets, sizes, Arc::new(values), None)
}
Expand Down Expand Up @@ -1133,8 +1154,8 @@ mod tests {

#[test]
fn test_try_new() {
let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into());
let sizes = SizeBuffer::new(vec![1, 3, 1, 0].into());
let offsets = ScalarBuffer::from(vec![0, 1, 4, 5]);
let sizes = ScalarBuffer::from(vec![1, 3, 1, 0]);
let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None);
let values = Arc::new(values) as ArrayRef;

Expand All @@ -1157,8 +1178,8 @@ mod tests {
);

let nulls = NullBuffer::new_null(4);
let offsets = OffsetBuffer::new(vec![0, 1, 2, 3, 4].into());
let sizes = SizeBuffer::new(vec![1, 1, 1, 1, 0].into());
let offsets = ScalarBuffer::from(vec![0, 1, 2, 3, 4]);
let sizes = ScalarBuffer::from(vec![1, 1, 1, 1, 0]);
let err = LargeListViewArray::try_new(
field,
offsets.clone(),
Expand Down
17 changes: 1 addition & 16 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
mod binary_array;

use crate::types::*;
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer, SizeBuffer};
use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
use arrow_data::ArrayData;
use arrow_schema::{DataType, IntervalUnit, TimeUnit};
use std::any::Any;
Expand Down Expand Up @@ -696,21 +696,6 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
}
}

/// Helper function that gets size from an [`ArrayData`]
///
/// # Safety
unsafe fn get_sizes<O: ArrowNativeType>(data: &ArrayData) -> SizeBuffer<O> {
match data.is_empty() && data.buffers()[1].is_empty() {
true => SizeBuffer::new_empty(),
false => {
let buffer = ScalarBuffer::new(data.buffers()[1].clone(), data.offset(), data.len());
// Safety:
// ArrayData is valid
SizeBuffer::new(buffer)
}
}
}

/// Helper function for printing potentially long arrays.
fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
where
Expand Down
10 changes: 5 additions & 5 deletions arrow-array/src/builder/generic_list_view_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use crate::builder::ArrayBuilder;
use crate::{ArrayRef, GenericListViewArray, OffsetSizeTrait};
use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, OffsetBuffer, SizeBuffer};
use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer};
use arrow_schema::{Field, FieldRef};
use std::any::Any;
use std::sync::Arc;
Expand Down Expand Up @@ -185,10 +185,10 @@ where

let offsets = self.offsets_builder.finish();
// Safety: Safe by construction
let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
let offsets = ScalarBuffer::from(offsets);

let sizes = self.sizes_builder.finish();
let sizes = SizeBuffer::new(sizes.into());
let sizes = ScalarBuffer::from(sizes);

let field = match &self.field {
Some(f) => f.clone(),
Expand All @@ -204,10 +204,10 @@ where

let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
// Safety: safe by construction
let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
let offsets = ScalarBuffer::from(offsets);

let sizes = Buffer::from_slice_ref(self.sizes_builder.as_slice());
let sizes = SizeBuffer::new(sizes.into());
let sizes = ScalarBuffer::from(sizes);

let field = match &self.field {
Some(f) => f.clone(),
Expand Down
2 changes: 0 additions & 2 deletions arrow-buffer/src/buffer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,5 @@ pub use boolean::*;
mod null;
pub use null::*;
mod run;
mod size;
pub use size::*;

pub use run::*;
33 changes: 33 additions & 0 deletions arrow-buffer/src/buffer/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,39 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
pub fn ptr_eq(&self, other: &Self) -> bool {
self.buffer.ptr_eq(&other.buffer)
}

/// Returns the length of this buffer in units of `T`
pub fn new_zeroed(len: usize) -> Self {
let len_bytes = len.checked_mul(std::mem::size_of::<T>()).expect("overflow");
let buffer = MutableBuffer::from_len_zeroed(len_bytes);
Self::from(buffer)
}

/// Create a new [`ScalarBuffer`] from the iterator of slice lengths
///
/// ```
/// # use arrow_buffer::ScalarBuffer;
/// let buf = ScalarBuffer::<i32>::from_lengths([1, 3, 5]);
/// assert_eq!(buf.as_ref(), &[1, 3, 5]);
/// ```
pub fn from_lengths<I>(lengths: I) -> Self
where
I: IntoIterator<Item = usize>,
{
let iter = lengths.into_iter();
let mut out = Vec::with_capacity(iter.size_hint().0);

for size in iter {
out.push(T::usize_as(size))
}
Self::from(out)
}

/// Create a new [`ScalarBuffer`] containing a single 0 value
pub fn new_empty() -> Self {
let buffer = MutableBuffer::from_len_zeroed(std::mem::size_of::<T>());
Self::from(buffer.into_buffer())
}
}

impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
Expand Down
Loading

0 comments on commit e26ac34

Please sign in to comment.