From 35a5b067e63467af72de5e885d41ac2f6d06e58b Mon Sep 17 00:00:00 2001 From: Neville Dipale Date: Wed, 14 Jul 2021 20:49:29 +0200 Subject: [PATCH] wip: review feedback --- arrow/src/array/array.rs | 30 +++++++++++++++++-- arrow/src/array/array_map.rs | 56 ++++++++++++++++++++++++++++++++---- arrow/src/array/equal/mod.rs | 8 +++++- 3 files changed, 85 insertions(+), 9 deletions(-) diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index e151b21abd0d..07ee002aed40 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -453,7 +453,9 @@ pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef { .map(|field| ArrayData::new_empty(field.data_type())) .collect(), )), - DataType::Map(_field, _keys_sorted) => todo!(), + DataType::Map(field, _keys_sorted) => { + new_null_list_array::(data_type, field.data_type(), length) + } DataType::Union(_) => { unimplemented!("Creating null Union array not yet supported") } @@ -560,7 +562,7 @@ where writeln!(f, " null,")?; } else { write!(f, " ")?; - print_item(&array, i, f)?; + print_item(array, i, f)?; writeln!(f, ",")?; } } @@ -576,7 +578,7 @@ where writeln!(f, " null,")?; } else { write!(f, " ")?; - print_item(&array, i, f)?; + print_item(array, i, f)?; writeln!(f, ",")?; } } @@ -659,6 +661,28 @@ mod tests { } } + #[test] + fn test_null_map() { + let data_type = DataType::Map( + Box::new(Field::new( + "entry", + DataType::Struct(vec![ + Field::new("key", DataType::Utf8, false), + Field::new("key", DataType::Int32, true), + ]), + false, + )), + false, + ); + let array = new_null_array(&data_type, 9); + let a = array.as_any().downcast_ref::().unwrap(); + assert_eq!(a.len(), 9); + assert_eq!(a.value_offsets()[9], 0i32); + for i in 0..9 { + assert!(a.is_null(i)); + } + } + #[test] fn test_null_dictionary() { let values = vec![None, None, None, None, None, None, None, None, None] diff --git a/arrow/src/array/array_map.rs b/arrow/src/array/array_map.rs index 44231a5f6728..ba040ae721b8 100644 --- a/arrow/src/array/array_map.rs +++ b/arrow/src/array/array_map.rs @@ -206,7 +206,9 @@ mod tests { .build(); let values_data = ArrayData::builder(DataType::UInt32) .len(8) - .add_buffer(Buffer::from(&[0u32, 1, 2, 3, 4, 5, 6, 7].to_byte_slice())) + .add_buffer(Buffer::from( + &[0u32, 10, 20, 30, 40, 50, 60, 70].to_byte_slice(), + )) .build(); // Construct a buffer for value offsets, for the nested array: @@ -246,7 +248,10 @@ mod tests { .build(); let value_data = ArrayData::builder(DataType::UInt32) .len(8) - .add_buffer(Buffer::from(&[0u32, 1, 2, 3, 4, 5, 6, 7].to_byte_slice())) + .add_buffer(Buffer::from( + &[0u32, 10, 20, 0, 40, 0, 60, 70].to_byte_slice(), + )) + .null_bit_buffer(Buffer::from(&[0b11010110])) .build(); // Construct a buffer for value offsets, for the nested array: @@ -254,7 +259,7 @@ mod tests { let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice()); let keys_field = Field::new("keys", DataType::Int32, false); - let values_field = Field::new("values", DataType::UInt32, false); + let values_field = Field::new("values", DataType::UInt32, true); let entry_struct = StructArray::from(vec![ (keys_field.clone(), make_array(key_data)), (values_field.clone(), make_array(value_data.clone())), @@ -285,7 +290,8 @@ mod tests { assert_eq!(2, map_array.value_length(2)); let key_array = Arc::new(Int32Array::from(vec![0, 1, 2])) as ArrayRef; - let value_array = Arc::new(UInt32Array::from(vec![0, 1, 2])) as ArrayRef; + let value_array = + Arc::new(UInt32Array::from(vec![None, Some(10u32), Some(20)])) as ArrayRef; let struct_array = StructArray::from(vec![ (keys_field.clone(), key_array), (values_field.clone(), value_array), @@ -324,7 +330,8 @@ mod tests { assert_eq!(2, map_array.value_length(1)); let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef; - let value_array = Arc::new(UInt32Array::from(vec![3, 4, 5])) as ArrayRef; + let value_array = + Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef; let struct_array = StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]); assert_eq!( @@ -358,6 +365,45 @@ mod tests { assert_eq!(3, sliced_map_array.value_length(0)); assert_eq!(6, sliced_map_array.value_offsets()[1]); assert_eq!(2, sliced_map_array.value_length(1)); + + // Construct key and values + let keys_data = ArrayData::builder(DataType::Int32) + .len(5) + .add_buffer(Buffer::from(&[3, 4, 5, 6, 7].to_byte_slice())) + .build(); + let values_data = ArrayData::builder(DataType::UInt32) + .len(5) + .add_buffer(Buffer::from(&[30u32, 40, 50, 60, 70].to_byte_slice())) + .build(); + + // Construct a buffer for value offsets, for the nested array: + // [[3, 4, 5], [6, 7]] + let entry_offsets = Buffer::from(&[0, 3, 5].to_byte_slice()); + + let keys = Field::new("keys", DataType::Int32, false); + let values = Field::new("values", DataType::UInt32, false); + let entry_struct = StructArray::from(vec![ + (keys, make_array(keys_data)), + (values, make_array(values_data)), + ]); + + // Construct a map array from the above two + let map_data_type = DataType::Map( + Box::new(Field::new( + "entries", + entry_struct.data_type().clone(), + true, + )), + false, + ); + let expected_map_data = ArrayData::builder(map_data_type) + .len(2) + .add_buffer(entry_offsets) + .add_child_data(entry_struct.data().clone()) + .build(); + let expected_map_array = MapArray::from(expected_map_data); + + assert_eq!(&expected_map_array, sliced_map_array) } #[test] diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index ba6f51e8fec9..d45ba1f6252f 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -22,7 +22,7 @@ use super::{ Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray, FixedSizeBinaryArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, - GenericStringArray, NullArray, OffsetSizeTrait, PrimitiveArray, + GenericStringArray, MapArray, NullArray, OffsetSizeTrait, PrimitiveArray, StringOffsetSizeTrait, StructArray, }; @@ -117,6 +117,12 @@ impl PartialEq for GenericListArray { } } +impl PartialEq for MapArray { + fn eq(&self, other: &Self) -> bool { + equal(self.data(), other.data()) + } +} + impl PartialEq for FixedSizeListArray { fn eq(&self, other: &Self) -> bool { equal(self.data(), other.data())