From 22d28fa76abd2e29f87a359833801aef922ac2a9 Mon Sep 17 00:00:00 2001 From: Dmitry Patsura Date: Thu, 7 Jan 2021 19:51:25 +0300 Subject: [PATCH] ARROW-11149: [Rust] Impl build_empty_fixed_size_list_array, move to build_empty_list_array to generic by OffsetSizeTrait --- rust/arrow/src/array/array_list.rs | 414 ++++++++++++-------- rust/arrow/src/array/mod.rs | 2 +- rust/datafusion/src/physical_plan/common.rs | 13 +- rust/parquet/src/arrow/array_reader.rs | 2 +- 4 files changed, 257 insertions(+), 174 deletions(-) diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs index 08f863ae7be61..e40df9eb8dafa 100644 --- a/rust/arrow/src/array/array_list.rs +++ b/rust/arrow/src/array/array_list.rs @@ -25,9 +25,10 @@ use num::Num; use super::{ array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayDataRef, - ArrayRef, BinaryBuilder, BooleanBuilder, LargeListBuilder, ListBuilder, - PrimitiveBuilder, StringBuilder, + ArrayRef, BinaryBuilder, BooleanBuilder, FixedSizeListBuilder, PrimitiveBuilder, + StringBuilder, }; +use crate::array::builder::GenericListBuilder; use crate::datatypes::ArrowNativeType; use crate::datatypes::*; use crate::error::{ArrowError, Result}; @@ -299,187 +300,266 @@ impl fmt::Debug for FixedSizeListArray { } macro_rules! build_empty_list_array_with_primitive_items { - ($item_type:ident, $list_builder:ident) => {{ + ($item_type:ident, $offset_type:ident) => {{ let values_builder = PrimitiveBuilder::<$item_type>::new(0); - let mut builder = $list_builder::new(values_builder); + let mut builder = + GenericListBuilder::<$offset_type, PrimitiveBuilder<$item_type>>::new( + values_builder, + ); let empty_list_array = builder.finish(); Ok(Arc::new(empty_list_array)) }}; } macro_rules! build_empty_list_array_with_non_primitive_items { - ($type_builder:ident, $list_builder:ident) => {{ + ($type_builder:ident, $offset_type:ident) => {{ let values_builder = $type_builder::new(0); - let mut builder = $list_builder::new(values_builder); + let mut builder = + GenericListBuilder::<$offset_type, $type_builder>::new(values_builder); let empty_list_array = builder.finish(); Ok(Arc::new(empty_list_array)) }}; } -macro_rules! make_empty_list_fn { - ($name:ident, $list_builder:ident) => { - pub fn $name(item_type: DataType) -> Result { - match item_type { - DataType::UInt8 => { - build_empty_list_array_with_primitive_items!(UInt8Type, $list_builder) - } - DataType::UInt16 => { - build_empty_list_array_with_primitive_items!( - UInt16Type, - $list_builder - ) - } - DataType::UInt32 => { - build_empty_list_array_with_primitive_items!( - UInt32Type, - $list_builder - ) - } - DataType::UInt64 => { - build_empty_list_array_with_primitive_items!( - UInt64Type, - $list_builder - ) - } - DataType::Int8 => { - build_empty_list_array_with_primitive_items!(Int8Type, $list_builder) - } - DataType::Int16 => { - build_empty_list_array_with_primitive_items!(Int16Type, $list_builder) - } - DataType::Int32 => { - build_empty_list_array_with_primitive_items!(Int32Type, $list_builder) - } - DataType::Int64 => { - build_empty_list_array_with_primitive_items!(Int64Type, $list_builder) - } - DataType::Float32 => { - build_empty_list_array_with_primitive_items!( - Float32Type, - $list_builder - ) - } - DataType::Float64 => { - build_empty_list_array_with_primitive_items!( - Float64Type, - $list_builder - ) - } - DataType::Boolean => { - build_empty_list_array_with_non_primitive_items!( - BooleanBuilder, - $list_builder - ) - } - DataType::Date32(_) => { - build_empty_list_array_with_primitive_items!( - Date32Type, - $list_builder - ) - } - DataType::Date64(_) => { - build_empty_list_array_with_primitive_items!( - Date64Type, - $list_builder - ) - } - DataType::Time32(TimeUnit::Second) => { - build_empty_list_array_with_primitive_items!( - Time32SecondType, - $list_builder - ) - } - DataType::Time32(TimeUnit::Millisecond) => { - build_empty_list_array_with_primitive_items!( - Time32MillisecondType, - $list_builder - ) - } - DataType::Time64(TimeUnit::Microsecond) => { - build_empty_list_array_with_primitive_items!( - Time64MicrosecondType, - $list_builder - ) - } - DataType::Time64(TimeUnit::Nanosecond) => { - build_empty_list_array_with_primitive_items!( - Time64NanosecondType, - $list_builder - ) - } - DataType::Duration(TimeUnit::Second) => { - build_empty_list_array_with_primitive_items!( - DurationSecondType, - $list_builder - ) - } - DataType::Duration(TimeUnit::Millisecond) => { - build_empty_list_array_with_primitive_items!( - DurationMillisecondType, - $list_builder - ) - } - DataType::Duration(TimeUnit::Microsecond) => { - build_empty_list_array_with_primitive_items!( - DurationMicrosecondType, - $list_builder - ) - } - DataType::Duration(TimeUnit::Nanosecond) => { - build_empty_list_array_with_primitive_items!( - DurationNanosecondType, - $list_builder - ) - } - DataType::Timestamp(TimeUnit::Second, _) => { - build_empty_list_array_with_primitive_items!( - TimestampSecondType, - $list_builder - ) - } - DataType::Timestamp(TimeUnit::Millisecond, _) => { - build_empty_list_array_with_primitive_items!( - TimestampMillisecondType, - $list_builder - ) - } - DataType::Timestamp(TimeUnit::Microsecond, _) => { - build_empty_list_array_with_primitive_items!( - TimestampMicrosecondType, - $list_builder - ) - } - DataType::Timestamp(TimeUnit::Nanosecond, _) => { - build_empty_list_array_with_primitive_items!( - TimestampNanosecondType, - $list_builder - ) - } - DataType::Utf8 => { - build_empty_list_array_with_non_primitive_items!( - StringBuilder, - $list_builder - ) - } - DataType::Binary => { - build_empty_list_array_with_non_primitive_items!( - BinaryBuilder, - $list_builder - ) - } - _ => Err(ArrowError::Unsupported(format!( - "{} of type List({:?}) is not supported by {}", - String::from(stringify!($list_builder)), - item_type, - String::from(stringify!($name)) - ))), - } +pub fn build_empty_list_array( + item_type: DataType, +) -> Result { + match item_type { + DataType::UInt8 => { + build_empty_list_array_with_primitive_items!(UInt8Type, OffsetSize) } - }; + DataType::UInt16 => { + build_empty_list_array_with_primitive_items!(UInt16Type, OffsetSize) + } + DataType::UInt32 => { + build_empty_list_array_with_primitive_items!(UInt32Type, OffsetSize) + } + DataType::UInt64 => { + build_empty_list_array_with_primitive_items!(UInt64Type, OffsetSize) + } + DataType::Int8 => { + build_empty_list_array_with_primitive_items!(Int8Type, OffsetSize) + } + DataType::Int16 => { + build_empty_list_array_with_primitive_items!(Int16Type, OffsetSize) + } + DataType::Int32 => { + build_empty_list_array_with_primitive_items!(Int32Type, OffsetSize) + } + DataType::Int64 => { + build_empty_list_array_with_primitive_items!(Int64Type, OffsetSize) + } + DataType::Float32 => { + build_empty_list_array_with_primitive_items!(Float32Type, OffsetSize) + } + DataType::Float64 => { + build_empty_list_array_with_primitive_items!(Float64Type, OffsetSize) + } + DataType::Boolean => { + build_empty_list_array_with_non_primitive_items!(BooleanBuilder, OffsetSize) + } + DataType::Date32(_) => { + build_empty_list_array_with_primitive_items!(Date32Type, OffsetSize) + } + DataType::Date64(_) => { + build_empty_list_array_with_primitive_items!(Date64Type, OffsetSize) + } + DataType::Time32(TimeUnit::Second) => { + build_empty_list_array_with_primitive_items!(Time32SecondType, OffsetSize) + } + DataType::Time32(TimeUnit::Millisecond) => { + build_empty_list_array_with_primitive_items!( + Time32MillisecondType, + OffsetSize + ) + } + DataType::Time64(TimeUnit::Microsecond) => { + build_empty_list_array_with_primitive_items!( + Time64MicrosecondType, + OffsetSize + ) + } + DataType::Time64(TimeUnit::Nanosecond) => { + build_empty_list_array_with_primitive_items!(Time64NanosecondType, OffsetSize) + } + DataType::Duration(TimeUnit::Second) => { + build_empty_list_array_with_primitive_items!(DurationSecondType, OffsetSize) + } + DataType::Duration(TimeUnit::Millisecond) => { + build_empty_list_array_with_primitive_items!( + DurationMillisecondType, + OffsetSize + ) + } + DataType::Duration(TimeUnit::Microsecond) => { + build_empty_list_array_with_primitive_items!( + DurationMicrosecondType, + OffsetSize + ) + } + DataType::Duration(TimeUnit::Nanosecond) => { + build_empty_list_array_with_primitive_items!( + DurationNanosecondType, + OffsetSize + ) + } + DataType::Timestamp(TimeUnit::Second, _) => { + build_empty_list_array_with_primitive_items!(TimestampSecondType, OffsetSize) + } + DataType::Timestamp(TimeUnit::Millisecond, _) => { + build_empty_list_array_with_primitive_items!( + TimestampMillisecondType, + OffsetSize + ) + } + DataType::Timestamp(TimeUnit::Microsecond, _) => { + build_empty_list_array_with_primitive_items!( + TimestampMicrosecondType, + OffsetSize + ) + } + DataType::Timestamp(TimeUnit::Nanosecond, _) => { + build_empty_list_array_with_primitive_items!( + TimestampNanosecondType, + OffsetSize + ) + } + DataType::Utf8 => { + build_empty_list_array_with_non_primitive_items!(StringBuilder, OffsetSize) + } + DataType::Binary => { + build_empty_list_array_with_non_primitive_items!(BinaryBuilder, OffsetSize) + } + _ => Err(ArrowError::Unsupported(format!( + "{} of type List({:?}) is not supported by {}", + String::from(stringify!($list_builder)), + item_type, + String::from(stringify!($name)) + ))), + } +} + +macro_rules! build_empty_fixed_size_list_array_with_primitive_items { + ($item_type:ident) => {{ + let values_builder = PrimitiveBuilder::<$item_type>::new(0); + let mut builder = FixedSizeListBuilder::new(values_builder, 0); + let empty_list_array = builder.finish(); + Ok(Arc::new(empty_list_array)) + }}; } -make_empty_list_fn!(build_empty_list_array, ListBuilder); -make_empty_list_fn!(build_empty_large_list_array, LargeListBuilder); +macro_rules! build_empty_fixed_size_list_array_with_non_primitive_items { + ($type_builder:ident) => {{ + let values_builder = $type_builder::new(0); + let mut builder = FixedSizeListBuilder::new(values_builder, 0); + let empty_list_array = builder.finish(); + Ok(Arc::new(empty_list_array)) + }}; +} + +pub fn build_empty_fixed_size_list_array(item_type: DataType) -> Result { + match item_type { + DataType::UInt8 => { + build_empty_fixed_size_list_array_with_primitive_items!(UInt8Type) + } + DataType::UInt16 => { + build_empty_fixed_size_list_array_with_primitive_items!(UInt16Type) + } + DataType::UInt32 => { + build_empty_fixed_size_list_array_with_primitive_items!(UInt32Type) + } + DataType::UInt64 => { + build_empty_fixed_size_list_array_with_primitive_items!(UInt64Type) + } + DataType::Int8 => { + build_empty_fixed_size_list_array_with_primitive_items!(Int8Type) + } + DataType::Int16 => { + build_empty_fixed_size_list_array_with_primitive_items!(Int16Type) + } + DataType::Int32 => { + build_empty_fixed_size_list_array_with_primitive_items!(Int32Type) + } + DataType::Int64 => { + build_empty_fixed_size_list_array_with_primitive_items!(Int64Type) + } + DataType::Float32 => { + build_empty_fixed_size_list_array_with_primitive_items!(Float32Type) + } + DataType::Float64 => { + build_empty_fixed_size_list_array_with_primitive_items!(Float64Type) + } + DataType::Boolean => { + build_empty_fixed_size_list_array_with_non_primitive_items!(BooleanBuilder) + } + DataType::Date32(_) => { + build_empty_fixed_size_list_array_with_primitive_items!(Date32Type) + } + DataType::Date64(_) => { + build_empty_fixed_size_list_array_with_primitive_items!(Date64Type) + } + DataType::Time32(TimeUnit::Second) => { + build_empty_fixed_size_list_array_with_primitive_items!(Time32SecondType) + } + DataType::Time32(TimeUnit::Millisecond) => { + build_empty_fixed_size_list_array_with_primitive_items!(Time32MillisecondType) + } + DataType::Time64(TimeUnit::Microsecond) => { + build_empty_fixed_size_list_array_with_primitive_items!(Time64MicrosecondType) + } + DataType::Time64(TimeUnit::Nanosecond) => { + build_empty_fixed_size_list_array_with_primitive_items!(Time64NanosecondType) + } + DataType::Duration(TimeUnit::Second) => { + build_empty_fixed_size_list_array_with_primitive_items!(DurationSecondType) + } + DataType::Duration(TimeUnit::Millisecond) => { + build_empty_fixed_size_list_array_with_primitive_items!( + DurationMillisecondType + ) + } + DataType::Duration(TimeUnit::Microsecond) => { + build_empty_fixed_size_list_array_with_primitive_items!( + DurationMicrosecondType + ) + } + DataType::Duration(TimeUnit::Nanosecond) => { + build_empty_fixed_size_list_array_with_primitive_items!( + DurationNanosecondType + ) + } + DataType::Timestamp(TimeUnit::Second, _) => { + build_empty_fixed_size_list_array_with_primitive_items!(TimestampSecondType) + } + DataType::Timestamp(TimeUnit::Millisecond, _) => { + build_empty_fixed_size_list_array_with_primitive_items!( + TimestampMillisecondType + ) + } + DataType::Timestamp(TimeUnit::Microsecond, _) => { + build_empty_fixed_size_list_array_with_primitive_items!( + TimestampMicrosecondType + ) + } + DataType::Timestamp(TimeUnit::Nanosecond, _) => { + build_empty_fixed_size_list_array_with_primitive_items!( + TimestampNanosecondType + ) + } + DataType::Utf8 => { + build_empty_fixed_size_list_array_with_non_primitive_items!(StringBuilder) + } + DataType::Binary => { + build_empty_fixed_size_list_array_with_non_primitive_items!(BinaryBuilder) + } + _ => Err(ArrowError::Unsupported(format!( + "{} of type List({:?}) is not supported by {}", + String::from(stringify!($list_builder)), + item_type, + String::from(stringify!($name)) + ))), + } +} #[cfg(test)] mod tests { diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs index 346ecd2765a86..9caf7f8e257db 100644 --- a/rust/arrow/src/array/mod.rs +++ b/rust/arrow/src/array/mod.rs @@ -119,7 +119,7 @@ pub use self::array_binary::FixedSizeBinaryArray; pub use self::array_binary::LargeBinaryArray; pub use self::array_boolean::BooleanArray; pub use self::array_dictionary::DictionaryArray; -pub use self::array_list::build_empty_large_list_array; +pub use self::array_list::build_empty_fixed_size_list_array; pub use self::array_list::build_empty_list_array; pub use self::array_list::FixedSizeListArray; pub use self::array_list::LargeListArray; diff --git a/rust/datafusion/src/physical_plan/common.rs b/rust/datafusion/src/physical_plan/common.rs index b718f9eeb3432..9c390fc3a7eb3 100644 --- a/rust/datafusion/src/physical_plan/common.rs +++ b/rust/datafusion/src/physical_plan/common.rs @@ -37,7 +37,7 @@ use arrow::{ datatypes::Schema, }; use arrow::{ - array::{build_empty_large_list_array, build_empty_list_array}, + array::{build_empty_fixed_size_list_array, build_empty_list_array}, datatypes::{DataType, SchemaRef}, }; use futures::{Stream, TryStreamExt}; @@ -160,12 +160,15 @@ pub fn create_batch_empty(schema: &Schema) -> ArrowResult { DataType::Boolean => { Ok(Arc::new(BooleanArray::from(vec![] as Vec)) as ArrayRef) } - DataType::List(nested_type) => { - Ok(build_empty_list_array(nested_type.data_type().clone())?) - } - DataType::LargeList(nested_type) => Ok(build_empty_large_list_array( + DataType::List(nested_type) => Ok(build_empty_list_array::( + nested_type.data_type().clone(), + )?), + DataType::LargeList(nested_type) => Ok(build_empty_list_array::( nested_type.data_type().clone(), )?), + DataType::FixedSizeList(nested_type, _) => Ok( + build_empty_fixed_size_list_array(nested_type.data_type().clone())?, + ), _ => Err(DataFusionError::NotImplemented(format!( "Cannot convert datatype {:?} to array", f.data_type() diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index 3670b0e7ea33e..d4aa00e118a26 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -803,7 +803,7 @@ impl ArrayReader for ListArrayReader { let item_type = self.item_reader.get_data_type().clone(); if next_batch_array.len() == 0 { - return build_empty_list_array(item_type) + return build_empty_list_array::(item_type) .map_err(|err| ParquetError::General(err.to_string())); } let def_levels = self