Skip to content

Commit

Permalink
Restore (#1842)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Jun 23, 2022
1 parent b33ccae commit 4225292
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 117 deletions.
23 changes: 0 additions & 23 deletions arrow/src/array/builder/buffer_builder.rs
Expand Up @@ -22,29 +22,6 @@ use crate::datatypes::ArrowNativeType;

use super::PhantomData;

/// Converts a `MutableBuffer` to a `BufferBuilder<T>`.
///
/// `slots` is the number of array slots currently represented in the `MutableBuffer`.
pub(crate) fn mutable_buffer_to_builder<T: ArrowNativeType>(
mutable_buffer: MutableBuffer,
slots: usize,
) -> BufferBuilder<T> {
BufferBuilder::<T> {
buffer: mutable_buffer,
len: slots,
_marker: PhantomData,
}
}

/// Converts a `BufferBuilder<T>` into its underlying `MutableBuffer`.
///
/// `From` is not implemented because associated type bounds are unstable.
pub(crate) fn builder_to_mutable_buffer<T: ArrowNativeType>(
builder: BufferBuilder<T>,
) -> MutableBuffer {
builder.buffer
}

/// Builder for creating a [`Buffer`](crate::buffer::Buffer) object.
///
/// A [`Buffer`](crate::buffer::Buffer) is the underlying data
Expand Down
142 changes: 48 additions & 94 deletions arrow/src/array/builder/union_builder.rs
Expand Up @@ -15,28 +15,22 @@
// specific language governing permissions and limitations
// under the License.

use std::any::Any;
use std::collections::HashMap;

use crate::array::ArrayDataBuilder;
use crate::array::Int32BufferBuilder;
use crate::array::Int8BufferBuilder;
use crate::array::UnionArray;
use crate::buffer::MutableBuffer;
use crate::buffer::Buffer;

use crate::datatypes::ArrowPrimitiveType;
use crate::datatypes::DataType;
use crate::datatypes::Field;
use crate::datatypes::IntervalMonthDayNanoType;
use crate::datatypes::IntervalUnit;
use crate::datatypes::{Float32Type, Float64Type};
use crate::datatypes::{Int16Type, Int32Type, Int64Type, Int8Type};
use crate::datatypes::{UInt16Type, UInt32Type, UInt64Type, UInt8Type};
use crate::datatypes::{ArrowNativeType, ArrowPrimitiveType};
use crate::error::{ArrowError, Result};

use super::{BooleanBufferBuilder, BufferBuilder};

use super::buffer_builder::builder_to_mutable_buffer;
use super::buffer_builder::mutable_buffer_to_builder;
use crate::array::make_array;

/// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`.
Expand All @@ -47,101 +41,65 @@ struct FieldData {
/// The Arrow data type represented in the `values_buffer`, which is untyped
data_type: DataType,
/// A buffer containing the values for this field in raw bytes
values_buffer: Option<MutableBuffer>,
values_buffer: Box<dyn FieldDataValues>,
/// The number of array slots represented by the buffer
slots: usize,
/// A builder for the null bitmap
bitmap_builder: BooleanBufferBuilder,
}

/// A type-erased [`BufferBuilder`] used by [`FieldData`]
trait FieldDataValues: std::fmt::Debug {
fn as_mut_any(&mut self) -> &mut dyn Any;

fn append_null(&mut self);

fn finish(&mut self) -> Buffer;
}

impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
fn as_mut_any(&mut self) -> &mut dyn Any {
self
}

fn append_null(&mut self) {
self.advance(1)
}

fn finish(&mut self) -> Buffer {
self.finish()
}
}

impl FieldData {
/// Creates a new `FieldData`.
fn new(type_id: i8, data_type: DataType) -> Self {
fn new<T: ArrowPrimitiveType>(type_id: i8, data_type: DataType) -> Self {
Self {
type_id,
data_type,
values_buffer: Some(MutableBuffer::new(1)),
slots: 0,
values_buffer: Box::new(BufferBuilder::<T::Native>::new(1)),
bitmap_builder: BooleanBufferBuilder::new(1),
}
}

/// Appends a single value to this `FieldData`'s `values_buffer`.
#[allow(clippy::unnecessary_wraps)]
fn append_to_values_buffer<T: ArrowPrimitiveType>(
&mut self,
v: T::Native,
) -> Result<()> {
let values_buffer = self
.values_buffer
.take()
.expect("Values buffer was never created");
let mut builder: BufferBuilder<T::Native> =
mutable_buffer_to_builder(values_buffer, self.slots);
builder.append(v);
let mutable_buffer = builder_to_mutable_buffer(builder);
self.values_buffer = Some(mutable_buffer);
fn append_value<T: ArrowPrimitiveType>(&mut self, v: T::Native) {
self.values_buffer
.as_mut_any()
.downcast_mut::<BufferBuilder<T::Native>>()
.expect("Tried to append unexpected type")
.append(v);

self.slots += 1;
self.bitmap_builder.append(true);
Ok(())
self.slots += 1;
}

/// Appends a null to this `FieldData`.
#[allow(clippy::unnecessary_wraps)]
fn append_null<T: ArrowPrimitiveType>(&mut self) -> Result<()> {
let values_buffer = self
.values_buffer
.take()
.expect("Values buffer was never created");

let mut builder: BufferBuilder<T::Native> =
mutable_buffer_to_builder(values_buffer, self.slots);

builder.advance(1);
let mutable_buffer = builder_to_mutable_buffer(builder);
self.values_buffer = Some(mutable_buffer);
self.slots += 1;
fn append_null(&mut self) {
self.values_buffer.append_null();
self.bitmap_builder.append(false);
Ok(())
}

/// Appends a null to this `FieldData` when the type is not known at compile time.
///
/// As the main `append` method of `UnionBuilder` is generic, we need a way to append null
/// slots to the fields that are not being appended to in the case of sparse unions. This
/// method solves this problem by appending dynamically based on `DataType`.
///
/// Note, this method does **not** update the length of the `UnionArray` (this is done by the
/// main append operation) and assumes that it is called from a method that is generic over `T`
/// where `T` satisfies the bound `ArrowPrimitiveType`.
fn append_null_dynamic(&mut self) -> Result<()> {
match self.data_type {
DataType::Null => unimplemented!(),
DataType::Int8 => self.append_null::<Int8Type>()?,
DataType::Int16 => self.append_null::<Int16Type>()?,
DataType::Int32
| DataType::Date32
| DataType::Time32(_)
| DataType::Interval(IntervalUnit::YearMonth) => {
self.append_null::<Int32Type>()?
}
DataType::Int64
| DataType::Timestamp(_, _)
| DataType::Date64
| DataType::Time64(_)
| DataType::Interval(IntervalUnit::DayTime)
| DataType::Duration(_) => self.append_null::<Int64Type>()?,
DataType::Interval(IntervalUnit::MonthDayNano) => self.append_null::<IntervalMonthDayNanoType>()?,
DataType::UInt8 => self.append_null::<UInt8Type>()?,
DataType::UInt16 => self.append_null::<UInt16Type>()?,
DataType::UInt32 => self.append_null::<UInt32Type>()?,
DataType::UInt64 => self.append_null::<UInt64Type>()?,
DataType::Float32 => self.append_null::<Float32Type>()?,
DataType::Float64 => self.append_null::<Float64Type>()?,
_ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."),
};
Ok(())
self.slots += 1;
}
}

Expand Down Expand Up @@ -257,11 +215,12 @@ impl UnionBuilder {
data
}
None => match self.value_offset_builder {
Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE),
Some(_) => FieldData::new::<T>(self.fields.len() as i8, T::DATA_TYPE),
None => {
let mut fd = FieldData::new(self.fields.len() as i8, T::DATA_TYPE);
let mut fd =
FieldData::new::<T>(self.fields.len() as i8, T::DATA_TYPE);
for _ in 0..self.len {
fd.append_null::<T>()?;
fd.append_null();
}
fd
}
Expand All @@ -278,14 +237,14 @@ impl UnionBuilder {
None => {
for (_, fd) in self.fields.iter_mut() {
// Append to all bar the FieldData currently being appended to
fd.append_null_dynamic()?;
fd.append_null();
}
}
}

match v {
Some(v) => field_data.append_to_values_buffer::<T>(v)?,
None => field_data.append_null::<T>()?,
Some(v) => field_data.append_value::<T>(v),
None => field_data.append_null(),
}

self.fields.insert(type_name, field_data);
Expand All @@ -303,15 +262,13 @@ impl UnionBuilder {
FieldData {
type_id,
data_type,
values_buffer,
mut values_buffer,
slots,
mut bitmap_builder,
},
) in self.fields.into_iter()
{
let buffer = values_buffer
.expect("The `values_buffer` should only ever be None inside the `append` method.")
.into();
let buffer = values_buffer.finish();
let arr_data_builder = ArrayDataBuilder::new(data_type.clone())
.add_buffer(buffer)
.len(slots)
Expand All @@ -333,6 +290,3 @@ impl UnionBuilder {
UnionArray::try_new(&type_ids, type_id_buffer, value_offsets_buffer, children)
}
}

#[cfg(test)]
mod tests {}

0 comments on commit 4225292

Please sign in to comment.