Skip to content

Commit

Permalink
Add MapBuilder::with_values_field to support non-nullable values (#5482
Browse files Browse the repository at this point in the history
…) (#5483)

* Add MapBuilder::with_values_field

* Fix non-nullllable List and Map in StructBuilder
  • Loading branch information
lasantosr committed Mar 8, 2024
1 parent e2b1f22 commit 82fc0df
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 14 deletions.
88 changes: 82 additions & 6 deletions arrow-array/src/builder/map_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::{Array, ArrayRef, MapArray, StructArray};
use arrow_buffer::Buffer;
use arrow_buffer::{NullBuffer, NullBufferBuilder};
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType, Field};
use arrow_schema::{ArrowError, DataType, Field, FieldRef};
use std::any::Any;
use std::sync::Arc;

Expand Down Expand Up @@ -61,6 +61,7 @@ pub struct MapBuilder<K: ArrayBuilder, V: ArrayBuilder> {
field_names: MapFieldNames,
key_builder: K,
value_builder: V,
value_field: Option<FieldRef>,
}

/// The [`Field`] names for a [`MapArray`]
Expand Down Expand Up @@ -106,6 +107,20 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
field_names: field_names.unwrap_or_default(),
key_builder,
value_builder,
value_field: None,
}
}

/// Override the field passed to [`MapBuilder::new`]
///
/// By default a nullable field is created with the name `values`
///
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
/// field's data type does not match that of `V`
pub fn with_values_field(self, field: impl Into<FieldRef>) -> Self {
Self {
value_field: Some(field.into()),
..self
}
}

Expand Down Expand Up @@ -184,11 +199,14 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
keys_arr.data_type().clone(),
false, // always non-nullable
));
let values_field = Arc::new(Field::new(
self.field_names.value.as_str(),
values_arr.data_type().clone(),
true,
));
let values_field = match &self.value_field {
Some(f) => f.clone(),
None => Arc::new(Field::new(
self.field_names.value.as_str(),
values_arr.data_type().clone(),
true,
)),
};

let struct_array =
StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]);
Expand Down Expand Up @@ -296,4 +314,62 @@ mod tests {
42
);
}

#[test]
fn test_with_values_field() {
let value_field = Arc::new(Field::new("bars", DataType::Int32, false));
let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new())
.with_values_field(value_field.clone());
builder.keys().append_value(1);
builder.values().append_value(2);
builder.append(true).unwrap();
builder.append(false).unwrap(); // This is fine as nullability refers to nullability of values
builder.keys().append_value(3);
builder.values().append_value(4);
builder.append(true).unwrap();
let map = builder.finish();

assert_eq!(map.len(), 3);
assert_eq!(
map.data_type(),
&DataType::Map(
Arc::new(Field::new(
"entries",
DataType::Struct(
vec![
Arc::new(Field::new("keys", DataType::Int32, false)),
value_field.clone()
]
.into()
),
false,
)),
false
)
);

builder.keys().append_value(5);
builder.values().append_value(6);
builder.append(true).unwrap();
let map = builder.finish();

assert_eq!(map.len(), 1);
assert_eq!(
map.data_type(),
&DataType::Map(
Arc::new(Field::new(
"entries",
DataType::Struct(
vec![
Arc::new(Field::new("keys", DataType::Int32, false)),
value_field
]
.into()
),
false,
)),
false
)
);
}
}
19 changes: 11 additions & 8 deletions arrow-array/src/builder/struct_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,11 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
}
DataType::List(field) => {
let builder = make_builder(field.data_type(), capacity);
Box::new(ListBuilder::with_capacity(builder, capacity))
Box::new(ListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
}
DataType::LargeList(field) => {
let builder = make_builder(field.data_type(), capacity);
Box::new(LargeListBuilder::with_capacity(builder, capacity))
Box::new(LargeListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
}
DataType::Map(field, _) => match field.data_type() {
DataType::Struct(fields) => {
Expand All @@ -186,12 +186,15 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
};
let key_builder = make_builder(fields[0].data_type(), capacity);
let value_builder = make_builder(fields[1].data_type(), capacity);
Box::new(MapBuilder::with_capacity(
Some(map_field_names),
key_builder,
value_builder,
capacity,
))
Box::new(
MapBuilder::with_capacity(
Some(map_field_names),
key_builder,
value_builder,
capacity,
)
.with_values_field(fields[1].clone()),
)
}
t => panic!("The field of Map data type {t:?} should has a child Struct field"),
},
Expand Down

0 comments on commit 82fc0df

Please sign in to comment.