Skip to content
Permalink
Browse files
Fix generate_unions_case for Rust case (#1677)
* Fix generate_unions_case for rust case

* Add test
  • Loading branch information
viirya committed May 10, 2022
1 parent f72df51 commit 19f0ada47333eb80105f4ff53aaf887b2efa8873
Showing 5 changed files with 193 additions and 0 deletions.
@@ -499,6 +499,52 @@ impl DataType {
))
}
}
Some(s) if s == "union" => {
if let Some(Value::String(mode)) = map.get("mode") {
let union_mode = if mode == "SPARSE" {
UnionMode::Sparse
} else if mode == "DENSE" {
UnionMode::Dense
} else {
return Err(ArrowError::ParseError(format!(
"Unknown union mode {:?} for union",
mode
)));
};
if let Some(type_ids) = map.get("typeIds") {
let type_ids = type_ids
.as_array()
.unwrap()
.iter()
.map(|t| t.as_i64().unwrap())
.collect::<Vec<_>>();

let default_fields = type_ids
.iter()
.map(|t| {
Field::new("", DataType::Boolean, true).with_metadata(
Some(
[("type_id".to_string(), t.to_string())]
.iter()
.cloned()
.collect(),
),
)
})
.collect::<Vec<_>>();

Ok(DataType::Union(default_fields, union_mode))
} else {
Err(ArrowError::ParseError(
"Expecting a typeIds for union ".to_string(),
))
}
} else {
Err(ArrowError::ParseError(
"Expecting a mode for union".to_string(),
))
}
}
Some(other) => Err(ArrowError::ParseError(format!(
"invalid or unsupported type name: {} in {:?}",
other, json
@@ -390,6 +390,30 @@ impl Field {
}
}
}
DataType::Union(fields, mode) => match map.get("children") {
Some(Value::Array(values)) => {
let mut union_fields: Vec<Field> =
values.iter().map(Field::from).collect::<Result<_>>()?;
fields.iter().zip(union_fields.iter_mut()).for_each(
|(f, union_field)| {
union_field.set_metadata(Some(
f.metadata().unwrap().clone(),
));
},
);
DataType::Union(union_fields, mode)
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field 'children' must be an array".to_string(),
))
}
None => {
return Err(ArrowError::ParseError(
"Field missing 'children' attribute".to_string(),
));
}
},
_ => data_type,
};

@@ -392,6 +392,70 @@ mod tests {
assert_eq!(expected, dt);
}

#[test]
fn parse_union_from_json() {
let json = r#"
{
"name": "my_union",
"nullable": false,
"type": {
"name": "union",
"mode": "SPARSE",
"typeIds": [
5,
7
]
},
"children": [
{
"name": "f1",
"type": {
"name": "int",
"isSigned": true,
"bitWidth": 32
},
"nullable": true,
"children": []
},
{
"name": "f2",
"type": {
"name": "utf8"
},
"nullable": true,
"children": []
}
]
}
"#;
let value: Value = serde_json::from_str(json).unwrap();
let dt = Field::from(&value).unwrap();

let expected = Field::new(
"my_union",
DataType::Union(
vec![
Field::new("f1", DataType::Int32, true).with_metadata(Some(
[("type_id".to_string(), "5".to_string())]
.iter()
.cloned()
.collect(),
)),
Field::new("f2", DataType::Utf8, true).with_metadata(Some(
[("type_id".to_string(), "7".to_string())]
.iter()
.cloned()
.collect(),
)),
],
UnionMode::Sparse,
),
false,
);

assert_eq!(expected, dt);
}

#[test]
fn parse_utf8_from_json() {
let json = "{\"name\":\"utf8\"}";
@@ -132,6 +132,8 @@ pub struct ArrowJsonColumn {
pub data: Option<Vec<Value>>,
#[serde(rename = "OFFSET")]
pub offset: Option<Vec<Value>>, // leaving as Value as 64-bit offsets are strings
#[serde(rename = "TYPE_ID")]
pub type_id: Option<Vec<i8>>,
pub children: Option<Vec<ArrowJsonColumn>>,
}

@@ -472,6 +474,7 @@ impl ArrowJsonBatch {
validity: Some(validity),
data: Some(data),
offset: None,
type_id: None,
children: None,
}
}
@@ -481,6 +484,7 @@ impl ArrowJsonBatch {
validity: None,
data: None,
offset: None,
type_id: None,
children: None,
},
};
@@ -632,6 +632,61 @@ fn array_from_json(
let array = MapArray::from(array_data);
Ok(Arc::new(array))
}
DataType::Union(fields, _) => {
let field_type_ids = fields
.iter()
.enumerate()
.into_iter()
.map(|(idx, f)| {
(
f.metadata()
.and_then(|m| m.get("type_id"))
.unwrap()
.parse::<i8>()
.unwrap(),
idx,
)
})
.collect::<HashMap<_, _>>();

let type_ids = if let Some(type_id) = json_col.type_id {
type_id
.iter()
.map(|t| {
if field_type_ids.contains_key(t) {
Ok(*(field_type_ids.get(t).unwrap()) as i8)
} else {
Err(ArrowError::JsonError(format!(
"Unable to find type id {:?}",
t
)))
}
})
.collect::<Result<_>>()?
} else {
vec![]
};

let offset: Option<Buffer> = json_col.offset.map(|offsets| {
let offsets: Vec<i32> =
offsets.iter().map(|v| v.as_i64().unwrap() as i32).collect();
Buffer::from(&offsets.to_byte_slice())
});

let mut children: Vec<(Field, Arc<dyn Array>)> = vec![];
for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
let array = array_from_json(field, col, dictionaries)?;
children.push((field.clone(), array));
}

let array = UnionArray::try_new(
Buffer::from(&type_ids.to_byte_slice()),
offset,
children,
)
.unwrap();
Ok(Arc::new(array))
}
t => Err(ArrowError::JsonError(format!(
"data type {:?} not supported",
t

0 comments on commit 19f0ada

Please sign in to comment.