Skip to content

Commit

Permalink
Move avoid using copy-based buffer creation (#6039)
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangpengHao committed Jul 12, 2024
1 parent e70c16d commit 920a944
Show file tree
Hide file tree
Showing 25 changed files with 82 additions and 79 deletions.
4 changes: 2 additions & 2 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1025,13 +1025,13 @@ mod tests {
let value_data = ArrayData::builder(DataType::Int8)
.len(8)
.add_buffer(Buffer::from(
&[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
let keys = Buffer::from(&[2_i16, 3, 4].to_byte_slice());
let keys = Buffer::from([2_i16, 3, 4].to_byte_slice());

// Construct a dictionary array from the above two
let key_type = DataType::Int16;
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ mod tests {
assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"");

// Valid as nulls in child masked by parent
let nulls = NullBuffer::new(BooleanBuffer::new(vec![0b0000101].into(), 0, 3));
let nulls = NullBuffer::new(BooleanBuffer::new(Buffer::from([0b0000101]), 0, 3));
FixedSizeListArray::new(field, 2, values.clone(), Some(nulls));

let field = Arc::new(Field::new("item", DataType::Int64, true));
Expand Down
18 changes: 9 additions & 9 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,20 +448,20 @@ mod tests {
// Construct key and values
let keys_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();
let values_data = ArrayData::builder(DataType::UInt32)
.len(8)
.add_buffer(Buffer::from(
&[0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
[0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(),
))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());

let keys = Arc::new(Field::new("keys", DataType::Int32, false));
let values = Arc::new(Field::new("values", DataType::UInt32, false));
Expand Down Expand Up @@ -493,21 +493,21 @@ mod tests {
// Construct key and values
let key_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();
let value_data = ArrayData::builder(DataType::UInt32)
.len(8)
.add_buffer(Buffer::from(
&[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(),
))
.null_bit_buffer(Some(Buffer::from(&[0b11010110])))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
let entry_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());

let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
let values_field = Arc::new(Field::new("values", DataType::UInt32, true));
Expand Down Expand Up @@ -617,18 +617,18 @@ mod tests {
// Construct key and values
let keys_data = ArrayData::builder(DataType::Int32)
.len(5)
.add_buffer(Buffer::from(&[3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();
let values_data = ArrayData::builder(DataType::UInt32)
.len(5)
.add_buffer(Buffer::from(&[30u32, 40, 50, 60, 70].to_byte_slice()))
.add_buffer(Buffer::from([30u32, 40, 50, 60, 70].to_byte_slice()))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 5].to_byte_slice());
let entry_offsets = Buffer::from([0, 3, 5].to_byte_slice());

let keys = Arc::new(Field::new("keys", DataType::Int32, false));
let values = Arc::new(Field::new("values", DataType::UInt32, false));
Expand Down
4 changes: 2 additions & 2 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,15 +549,15 @@ mod tests {
let expected_string_data = ArrayData::builder(DataType::Utf8)
.len(4)
.null_bit_buffer(Some(Buffer::from(&[9_u8])))
.add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 3, 3, 3, 7].to_byte_slice()))
.add_buffer(Buffer::from(b"joemark"))
.build()
.unwrap();

let expected_int_data = ArrayData::builder(DataType::Int32)
.len(4)
.null_bit_buffer(Some(Buffer::from(&[11_u8])))
.add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice()))
.add_buffer(Buffer::from([1, 2, 0, 4].to_byte_slice()))
.build()
.unwrap();

Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ mod tests {

#[test]
fn test_access_concurrently() {
let buffer = Buffer::from(vec![1, 2, 3, 4, 5]);
let buffer = Buffer::from([1, 2, 3, 4, 5]);
let buffer2 = buffer.clone();
assert_eq!([1, 2, 3, 4, 5], buffer.as_slice());

Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/util/bit_chunk_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ mod tests {
const ALLOC_SIZE: usize = 4 * 1024;
let input = vec![0xFF_u8; ALLOC_SIZE];

let buffer: Buffer = Buffer::from(input);
let buffer: Buffer = Buffer::from_vec(input);

let bitchunks = buffer.bit_chunks(57, ALLOC_SIZE * 8 - 57);

Expand Down
8 changes: 5 additions & 3 deletions arrow-cast/src/base64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
//! [`StringArray`]: arrow_array::StringArray

use arrow_array::{Array, GenericBinaryArray, GenericStringArray, OffsetSizeTrait};
use arrow_buffer::OffsetBuffer;
use arrow_buffer::{Buffer, OffsetBuffer};
use arrow_schema::ArrowError;
use base64::encoded_len;
use base64::engine::Config;
Expand Down Expand Up @@ -50,7 +50,9 @@ pub fn b64_encode<E: Engine, O: OffsetSizeTrait>(
assert_eq!(offset, buffer_len);

// Safety: Base64 is valid UTF-8
unsafe { GenericStringArray::new_unchecked(offsets, buffer.into(), array.nulls().cloned()) }
unsafe {
GenericStringArray::new_unchecked(offsets, Buffer::from_vec(buffer), array.nulls().cloned())
}
}

/// Base64 decode each element of `array` with the provided [`Engine`]
Expand Down Expand Up @@ -79,7 +81,7 @@ pub fn b64_decode<E: Engine, O: OffsetSizeTrait>(

Ok(GenericBinaryArray::new(
offsets,
buffer.into(),
Buffer::from_vec(buffer),
array.nulls().cloned(),
))
}
Expand Down
4 changes: 2 additions & 2 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1959,7 +1959,7 @@ mod tests {
.len(20)
.offset(5)
.add_buffer(b1)
.null_bit_buffer(Some(Buffer::from(vec![
.null_bit_buffer(Some(Buffer::from([
0b01011111, 0b10110101, 0b01100011, 0b00011110,
])))
.build()
Expand Down Expand Up @@ -2164,7 +2164,7 @@ mod tests {

#[test]
fn test_count_nulls() {
let buffer = Buffer::from(vec![0b00010110, 0b10011111]);
let buffer = Buffer::from([0b00010110, 0b10011111]);
let buffer = NullBuffer::new(BooleanBuffer::new(buffer, 0, 16));
let count = count_nulls(Some(&buffer), 0, 16);
assert_eq!(count, 7);
Expand Down
2 changes: 1 addition & 1 deletion arrow-flight/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ pub fn flight_data_to_arrow_batch(
})
.map(|batch| {
reader::read_record_batch(
&Buffer::from(&data.data_body),
&Buffer::from(data.data_body.as_ref()),
batch,
schema,
dictionaries_by_id,
Expand Down
6 changes: 3 additions & 3 deletions arrow-integration-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ pub fn array_from_json(
let list_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
.offset(0)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_child_data(child_array.into_data())
.null_bit_buffer(Some(null_buf))
.build()
Expand All @@ -720,7 +720,7 @@ pub fn array_from_json(
let list_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
.offset(0)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_child_data(child_array.into_data())
.null_bit_buffer(Some(null_buf))
.build()
Expand Down Expand Up @@ -839,7 +839,7 @@ pub fn array_from_json(
.collect();
let array_data = ArrayData::builder(field.data_type().clone())
.len(json_col.count)
.add_buffer(Buffer::from(&offsets.to_byte_slice()))
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_child_data(child_array.into_data())
.null_bit_buffer(Some(null_buf))
.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ async fn receive_batch_flight_data(

while message.header_type() == ipc::MessageHeader::DictionaryBatch {
reader::read_dictionary(
&Buffer::from(&data.data_body),
&Buffer::from(data.data_body.as_ref()),
message
.header_as_dictionary_batch()
.expect("Error parsing dictionary"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ async fn save_uploaded_chunks(

let batch = record_batch_from_message(
message,
&Buffer::from(data.data_body),
&Buffer::from(data.data_body.as_ref()),
schema_ref.clone(),
&dictionaries_by_id,
)
Expand All @@ -375,7 +375,7 @@ async fn save_uploaded_chunks(
ipc::MessageHeader::DictionaryBatch => {
dictionary_from_message(
message,
&Buffer::from(data.data_body),
&Buffer::from(data.data_body.as_ref()),
schema_ref.clone(),
&mut dictionaries_by_id,
)
Expand Down
4 changes: 2 additions & 2 deletions arrow-ipc/src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ impl CompressionCodec {
} else if let Ok(decompressed_length) = usize::try_from(decompressed_length) {
// decompress data using the codec
let input_data = &input[(LENGTH_OF_PREFIX_DATA as usize)..];
self.decompress(input_data, decompressed_length as _)?
.into()
let v = self.decompress(input_data, decompressed_length as _)?;
Buffer::from_vec(v)
} else {
return Err(ArrowError::IpcError(format!(
"Invalid uncompressed length: {decompressed_length}"
Expand Down
6 changes: 3 additions & 3 deletions arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1850,7 +1850,7 @@ mod tests {
let c = ArrayDataBuilder::new(c_field.data_type().clone())
.len(7)
.add_child_data(d.to_data())
.null_bit_buffer(Some(Buffer::from(vec![0b00111011])))
.null_bit_buffer(Some(Buffer::from([0b00111011])))
.build()
.unwrap();
let b = BooleanArray::from(vec![
Expand All @@ -1866,14 +1866,14 @@ mod tests {
.len(7)
.add_child_data(b.to_data())
.add_child_data(c.clone())
.null_bit_buffer(Some(Buffer::from(vec![0b00111111])))
.null_bit_buffer(Some(Buffer::from([0b00111111])))
.build()
.unwrap();
let a_list = ArrayDataBuilder::new(a_field.data_type().clone())
.len(6)
.add_buffer(Buffer::from_slice_ref([0i32, 2, 3, 6, 6, 6, 7]))
.add_child_data(a)
.null_bit_buffer(Some(Buffer::from(vec![0b00110111])))
.null_bit_buffer(Some(Buffer::from([0b00110111])))
.build()
.unwrap();
let expected = make_array(a_list);
Expand Down
18 changes: 9 additions & 9 deletions arrow-json/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -927,12 +927,12 @@ mod tests {

let a_values = StringArray::from(vec!["a", "a1", "b", "c", "d", "e"]);
// list column rows: ["a", "a1"], ["b"], ["c"], ["d"], ["e"]
let a_value_offsets = Buffer::from(&[0, 2, 3, 4, 5, 6].to_byte_slice());
let a_value_offsets = Buffer::from([0, 2, 3, 4, 5, 6].to_byte_slice());
let a_list_data = ArrayData::builder(field_c1.data_type().clone())
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.into_data())
.null_bit_buffer(Some(Buffer::from(vec![0b00011111])))
.null_bit_buffer(Some(Buffer::from([0b00011111])))
.build()
.unwrap();
let a = ListArray::from(a_list_data);
Expand Down Expand Up @@ -976,17 +976,17 @@ mod tests {
// list column rows: [[1, 2], [3]], [], [[4, 5, 6]]
let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6]);

let a_value_offsets = Buffer::from(&[0, 2, 3, 6].to_byte_slice());
let a_value_offsets = Buffer::from([0, 2, 3, 6].to_byte_slice());
// Construct a list array from the above two
let a_list_data = ArrayData::builder(list_inner_type.data_type().clone())
.len(3)
.add_buffer(a_value_offsets)
.null_bit_buffer(Some(Buffer::from(vec![0b00000111])))
.null_bit_buffer(Some(Buffer::from([0b00000111])))
.add_child_data(a_values.into_data())
.build()
.unwrap();

let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
let c1_value_offsets = Buffer::from([0, 2, 2, 3].to_byte_slice());
let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
.len(3)
.add_buffer(c1_value_offsets)
Expand Down Expand Up @@ -1058,12 +1058,12 @@ mod tests {
// [{"c11": 1, "c12": {"c121": "e"}}, {"c12": {"c121": "f"}}],
// null,
// [{"c11": 5, "c12": {"c121": "g"}}]
let c1_value_offsets = Buffer::from(&[0, 2, 2, 3].to_byte_slice());
let c1_value_offsets = Buffer::from([0, 2, 2, 3].to_byte_slice());
let c1_list_data = ArrayData::builder(field_c1.data_type().clone())
.len(3)
.add_buffer(c1_value_offsets)
.add_child_data(struct_values.into_data())
.null_bit_buffer(Some(Buffer::from(vec![0b00000101])))
.null_bit_buffer(Some(Buffer::from([0b00000101])))
.build()
.unwrap();
let c1 = ListArray::from(c1_list_data);
Expand Down Expand Up @@ -1225,7 +1225,7 @@ mod tests {
);

// [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}, {"quux": 50}, {}]
let entry_offsets = Buffer::from(&[0, 1, 1, 1, 4, 5, 5].to_byte_slice());
let entry_offsets = Buffer::from([0, 1, 1, 1, 4, 5, 5].to_byte_slice());
let valid_buffer = Buffer::from([0b00111101]);

let map_data = ArrayData::builder(map_data_type.clone())
Expand Down Expand Up @@ -1408,7 +1408,7 @@ mod tests {
);

// [{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
let entry_offsets = Buffer::from(&[0, 2, 2, 3, 3].to_byte_slice());
let entry_offsets = Buffer::from([0, 2, 2, 3, 3].to_byte_slice());
let data = ArrayData::builder(field.data_type().clone())
.len(4)
.add_buffer(entry_offsets)
Expand Down
2 changes: 1 addition & 1 deletion arrow-select/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ mod tests {

#[test]
fn test_merge_nulls() {
let buffer = Buffer::from("helloworldbingohelloworld");
let buffer = Buffer::from(b"helloworldbingohelloworld");
let offsets = OffsetBuffer::from_lengths([5, 5, 5, 5, 5]);
let nulls = NullBuffer::from(vec![true, false, true, true, true]);
let values = StringArray::new(offsets, buffer, Some(nulls));
Expand Down
4 changes: 2 additions & 2 deletions arrow-string/src/substring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ mod tests {
}

fn generic_string_with_non_zero_offset<O: OffsetSizeTrait>() {
let values = "hellotherearrow";
let values = b"hellotherearrow";
let offsets = &[
O::zero(),
O::from_usize(5).unwrap(),
Expand Down Expand Up @@ -867,7 +867,7 @@ mod tests {
let data = ArrayData::builder(GenericStringArray::<O>::DATA_TYPE)
.len(2)
.add_buffer(Buffer::from_slice_ref(offsets))
.add_buffer(Buffer::from(values))
.add_buffer(Buffer::from(values.as_bytes()))
.null_bit_buffer(Some(Buffer::from(bitmap)))
.offset(1)
.build()
Expand Down
4 changes: 2 additions & 2 deletions arrow/examples/builders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,13 @@ fn main() {
// buffer.
let value_data = ArrayData::builder(DataType::Int32)
.len(8)
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.add_buffer(Buffer::from([0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build()
.unwrap();

// Construct a buffer for value offsets, for the nested array:
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
let value_offsets = Buffer::from([0, 3, 6, 8].to_byte_slice());

// Construct a list array from the above two
let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
Expand Down
2 changes: 1 addition & 1 deletion arrow/examples/tensor_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn main() -> Result<()> {

// In order to build a tensor from an array the function to_byte_slice add the
// required padding to the elements in the array.
let buf = Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7, 9, 10].to_byte_slice());
let buf = Buffer::from([0, 1, 2, 3, 4, 5, 6, 7, 9, 10].to_byte_slice());
let tensor = Int32Tensor::try_new(buf, Some(vec![2, 5]), None, None)?;
println!("\nInt32 Tensor");
println!("{tensor:?}");
Expand Down
Loading

0 comments on commit 920a944

Please sign in to comment.