Skip to content

Commit

Permalink
Improve performance reading ByteViewArray from parquet by removing …
Browse files Browse the repository at this point in the history
…an implicit copy (#6031)

* update byte view array to not implicit copy

* Add small comments
  • Loading branch information
XiangpengHao committed Jul 10, 2024
1 parent 3ce8e84 commit cb3babc
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
11 changes: 8 additions & 3 deletions parquet/src/arrow/array_reader/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ struct ByteViewArrayReader {
}

impl ByteViewArrayReader {
#[allow(unused)]
fn new(
pages: Box<dyn PageIterator>,
data_type: ArrowType,
Expand Down Expand Up @@ -316,7 +315,10 @@ impl ByteViewArrayDecoderPlain {
}

pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> Result<usize> {
let block_id = output.append_block(self.buf.clone().into());
// Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy
// Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy
let buf = arrow_buffer::Buffer::from_bytes(self.buf.clone().into());
let block_id = output.append_block(buf);

let to_read = len.min(self.max_remaining_values);

Expand Down Expand Up @@ -546,7 +548,10 @@ impl ByteViewArrayDecoderDeltaLength {

let src_lengths = &self.lengths[self.length_offset..self.length_offset + to_read];

let block_id = output.append_block(self.data.clone().into());
// Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy
// Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy
let bytes = arrow_buffer::Buffer::from_bytes(self.data.clone().into());
let block_id = output.append_block(bytes);

let mut current_offset = self.data_offset;
let initial_offset = current_offset;
Expand Down
1 change: 0 additions & 1 deletion parquet/src/arrow/buffer/view_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ impl ViewBuffer {
}

/// Converts this into an [`ArrayRef`] with the provided `data_type` and `null_buffer`
#[allow(unused)]
pub fn into_array(self, null_buffer: Option<Buffer>, data_type: &ArrowType) -> ArrayRef {
let len = self.views.len();
let views = Buffer::from_vec(self.views);
Expand Down

0 comments on commit cb3babc

Please sign in to comment.