Skip to content

Commit

Permalink
Remove Option from Field::metadata (#3091)
Browse files Browse the repository at this point in the history
* Remove Option from field metadata

* fix test issues

* fix clippy warnings

* fix test issues

* fix test issues

* use default for BTreeMap initialization

* empty commit

Co-authored-by: askoa <askoa@local>
  • Loading branch information
askoa and askoa committed Nov 15, 2022
1 parent 7d41e1c commit 8bb2917
Show file tree
Hide file tree
Showing 9 changed files with 176 additions and 192 deletions.
2 changes: 1 addition & 1 deletion arrow-array/src/builder/struct_builder.rs
Expand Up @@ -396,7 +396,7 @@ mod tests {

#[test]
#[should_panic(
expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported"
expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not currently supported"
)]
fn test_struct_array_builder_from_schema_unsupported_type() {
let mut fields = vec![Field::new("f1", DataType::Int16, false)];
Expand Down
10 changes: 5 additions & 5 deletions arrow-integration-test/src/field.rs
Expand Up @@ -53,7 +53,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
// Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
let metadata = match map.get("metadata") {
Some(&Value::Array(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
let mut res: BTreeMap<String, String> = BTreeMap::default();
for value in values {
match value.as_object() {
Some(map) => {
Expand Down Expand Up @@ -87,12 +87,12 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
}
}
}
Some(res)
res
}
// We also support map format, because Schema's metadata supports this.
// See https://github.com/apache/arrow/pull/5907
Some(&Value::Object(ref values)) => {
let mut res: BTreeMap<String, String> = BTreeMap::new();
let mut res: BTreeMap<String, String> = BTreeMap::default();
for (k, v) in values {
if let Some(str_value) = v.as_str() {
res.insert(k.clone(), str_value.to_string().clone());
Expand All @@ -103,14 +103,14 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
)));
}
}
Some(res)
res
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field `metadata` is not json array".to_string(),
));
}
_ => None,
_ => BTreeMap::default(),
};

// if data_type is a struct or list, get its children
Expand Down
171 changes: 84 additions & 87 deletions arrow-integration-test/src/lib.rs
Expand Up @@ -83,10 +83,10 @@ pub struct ArrowJsonField {

impl From<&Field> for ArrowJsonField {
fn from(field: &Field) -> Self {
let metadata_value = match field.metadata() {
Some(kv_list) => {
let metadata_value = match field.metadata().is_empty() {
false => {
let mut array = Vec::new();
for (k, v) in kv_list {
for (k, v) in field.metadata() {
let mut kv_map = SJMap::new();
kv_map.insert(k.clone(), Value::String(v.clone()));
array.push(Value::Object(kv_map));
Expand Down Expand Up @@ -1120,90 +1120,87 @@ mod tests {
let micros_tz = Some("UTC".to_string());
let nanos_tz = Some("Africa/Johannesburg".to_string());

let schema =
Schema::new(vec![
Field::new("bools-with-metadata-map", DataType::Boolean, true)
.with_metadata(Some(
[("k".to_string(), "v".to_string())]
.iter()
.cloned()
.collect(),
)),
Field::new("bools-with-metadata-vec", DataType::Boolean, true)
.with_metadata(Some(
[("k2".to_string(), "v2".to_string())]
.iter()
.cloned()
.collect(),
)),
Field::new("bools", DataType::Boolean, true),
Field::new("int8s", DataType::Int8, true),
Field::new("int16s", DataType::Int16, true),
Field::new("int32s", DataType::Int32, true),
Field::new("int64s", DataType::Int64, true),
Field::new("uint8s", DataType::UInt8, true),
Field::new("uint16s", DataType::UInt16, true),
Field::new("uint32s", DataType::UInt32, true),
Field::new("uint64s", DataType::UInt64, true),
Field::new("float32s", DataType::Float32, true),
Field::new("float64s", DataType::Float64, true),
Field::new("date_days", DataType::Date32, true),
Field::new("date_millis", DataType::Date64, true),
Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
Field::new(
"ts_millis",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micros",
DataType::Timestamp(TimeUnit::Microsecond, None),
true,
),
Field::new(
"ts_nanos",
DataType::Timestamp(TimeUnit::Nanosecond, None),
true,
),
Field::new(
"ts_secs_tz",
DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
true,
),
Field::new(
"ts_millis_tz",
DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
true,
),
Field::new(
"ts_micros_tz",
DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
true,
),
Field::new(
"ts_nanos_tz",
DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
true,
),
Field::new("utf8s", DataType::Utf8, true),
Field::new(
"lists",
DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
true,
),
Field::new(
"structs",
DataType::Struct(vec![
Field::new("int32s", DataType::Int32, true),
Field::new("utf8s", DataType::Utf8, true),
]),
true,
),
]);
let schema = Schema::new(vec![
Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata(
[("k".to_string(), "v".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools-with-metadata-vec", DataType::Boolean, true).with_metadata(
[("k2".to_string(), "v2".to_string())]
.iter()
.cloned()
.collect(),
),
Field::new("bools", DataType::Boolean, true),
Field::new("int8s", DataType::Int8, true),
Field::new("int16s", DataType::Int16, true),
Field::new("int32s", DataType::Int32, true),
Field::new("int64s", DataType::Int64, true),
Field::new("uint8s", DataType::UInt8, true),
Field::new("uint16s", DataType::UInt16, true),
Field::new("uint32s", DataType::UInt32, true),
Field::new("uint64s", DataType::UInt64, true),
Field::new("float32s", DataType::Float32, true),
Field::new("float64s", DataType::Float64, true),
Field::new("date_days", DataType::Date32, true),
Field::new("date_millis", DataType::Date64, true),
Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
Field::new(
"ts_millis",
DataType::Timestamp(TimeUnit::Millisecond, None),
true,
),
Field::new(
"ts_micros",
DataType::Timestamp(TimeUnit::Microsecond, None),
true,
),
Field::new(
"ts_nanos",
DataType::Timestamp(TimeUnit::Nanosecond, None),
true,
),
Field::new(
"ts_secs_tz",
DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
true,
),
Field::new(
"ts_millis_tz",
DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
true,
),
Field::new(
"ts_micros_tz",
DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
true,
),
Field::new(
"ts_nanos_tz",
DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
true,
),
Field::new("utf8s", DataType::Utf8, true),
Field::new(
"lists",
DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
true,
),
Field::new(
"structs",
DataType::Struct(vec![
Field::new("int32s", DataType::Int32, true),
Field::new("utf8s", DataType::Utf8, true),
]),
true,
),
]);

let bools_with_metadata_map =
BooleanArray::from(vec![Some(true), None, Some(false)]);
Expand Down
30 changes: 13 additions & 17 deletions arrow-ipc/src/convert.rs
Expand Up @@ -86,18 +86,16 @@ impl<'a> From<crate::Field<'a>> for Field {
)
};

let mut metadata = None;
let mut metadata_map = BTreeMap::default();
if let Some(list) = field.custom_metadata() {
let mut metadata_map = BTreeMap::default();
for kv in list {
if let (Some(k), Some(v)) = (kv.key(), kv.value()) {
metadata_map.insert(k.to_string(), v.to_string());
}
}
metadata = Some(metadata_map);
}

arrow_field.with_metadata(metadata)
arrow_field.with_metadata(metadata_map)
}
}

Expand Down Expand Up @@ -424,19 +422,17 @@ pub(crate) fn build_field<'a>(
) -> WIPOffset<crate::Field<'a>> {
// Optional custom metadata.
let mut fb_metadata = None;
if let Some(metadata) = field.metadata() {
if !metadata.is_empty() {
let mut kv_vec = vec![];
for (k, v) in metadata {
let kv_args = crate::KeyValueArgs {
key: Some(fbb.create_string(k.as_str())),
value: Some(fbb.create_string(v.as_str())),
};
let kv_offset = crate::KeyValue::create(fbb, &kv_args);
kv_vec.push(kv_offset);
}
fb_metadata = Some(fbb.create_vector(&kv_vec));
if !field.metadata().is_empty() {
let mut kv_vec = vec![];
for (k, v) in field.metadata() {
let kv_args = crate::KeyValueArgs {
key: Some(fbb.create_string(k.as_str())),
value: Some(fbb.create_string(v.as_str())),
};
let kv_offset = crate::KeyValue::create(fbb, &kv_args);
kv_vec.push(kv_offset);
}
fb_metadata = Some(fbb.create_vector(&kv_vec));
};

let fb_field_name = fbb.create_string(field.name().as_str());
Expand Down Expand Up @@ -822,7 +818,7 @@ mod tests {
.collect();
let schema = Schema::new_with_metadata(
vec![
Field::new("uint8", DataType::UInt8, false).with_metadata(Some(field_md)),
Field::new("uint8", DataType::UInt8, false).with_metadata(field_md),
Field::new("uint16", DataType::UInt16, true),
Field::new("uint32", DataType::UInt32, false),
Field::new("uint64", DataType::UInt64, true),
Expand Down
6 changes: 3 additions & 3 deletions arrow-schema/src/datatype.rs
Expand Up @@ -387,12 +387,12 @@ mod tests {
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();

// Non-empty map: should be converted as JSON obj { ... }
let first_name = Field::new("first_name", DataType::Utf8, false)
.with_metadata(Some(field_metadata));
let first_name =
Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);

// Empty map: should be omitted.
let last_name = Field::new("last_name", DataType::Utf8, false)
.with_metadata(Some(BTreeMap::default()));
.with_metadata(BTreeMap::default());

let person = DataType::Struct(vec![
first_name,
Expand Down

0 comments on commit 8bb2917

Please sign in to comment.