Skip to content

Commit

Permalink
Support casting to/from DataType::Null in cast kernel (apache#1572)
Browse files Browse the repository at this point in the history
* cast null from and to others

* fmt fix

* add more ut

Co-authored-by: duripeng <duripeng@baidu.com>
  • Loading branch information
2 people authored and MazterQyou committed Jan 19, 2024
1 parent 7e9519c commit dbc9929
Showing 1 changed file with 129 additions and 1 deletion.
130 changes: 129 additions & 1 deletion arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,20 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
| UInt64
| Float64
| Date64
| Timestamp(_, _)
| Time64(_)
| Duration(_)
| Interval(_)
| FixedSizeBinary(_)
| Binary
| Utf8
| LargeBinary
| LargeUtf8
| List(_)
| LargeList(_)
| FixedSizeList(_, _)
| Struct(_)
| Map(_, _)
| Dictionary(_, _),
)
| (
Expand All @@ -115,7 +128,20 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
| UInt64
| Float64
| Date64
| Timestamp(_, _)
| Time64(_)
| Duration(_)
| Interval(_)
| FixedSizeBinary(_)
| Binary
| Utf8
| LargeBinary
| LargeUtf8
| List(_)
| LargeList(_)
| FixedSizeList(_, _)
| Struct(_)
| Map(_, _)
| Dictionary(_, _),
Null,
) => true,
Expand Down Expand Up @@ -539,7 +565,20 @@ pub fn cast_with_options(
| UInt64
| Float64
| Date64
| Timestamp(_, _)
| Time64(_)
| Duration(_)
| Interval(_)
| FixedSizeBinary(_)
| Binary
| Utf8
| LargeBinary
| LargeUtf8
| List(_)
| LargeList(_)
| FixedSizeList(_, _)
| Struct(_)
| Map(_, _)
| Dictionary(_, _),
)
| (
Expand All @@ -557,7 +596,20 @@ pub fn cast_with_options(
| UInt64
| Float64
| Date64
| Timestamp(_, _)
| Time64(_)
| Duration(_)
| Interval(_)
| FixedSizeBinary(_)
| Binary
| Utf8
| LargeBinary
| LargeUtf8
| List(_)
| LargeList(_)
| FixedSizeList(_, _)
| Struct(_)
| Map(_, _)
| Dictionary(_, _),
Null,
) => Ok(new_null_array(to_type, array.len())),
Expand Down Expand Up @@ -4655,7 +4707,7 @@ mod tests {
}

#[test]
fn test_cast_null_array_from_and_to_others() {
fn test_cast_null_array_from_and_to_primitive_array() {
macro_rules! typed_test {
($ARR_TYPE:ident, $DATATYPE:ident, $TYPE:tt) => {{
{
Expand Down Expand Up @@ -4688,6 +4740,82 @@ mod tests {

typed_test!(Float32Array, Float32, Float32Type);
typed_test!(Float64Array, Float64, Float64Type);

typed_test!(Date32Array, Date32, Date32Type);
typed_test!(Date64Array, Date64, Date64Type);
}

fn cast_from_and_to_null(data_type: &DataType) {
// Cast from data_type to null
{
let array = new_null_array(data_type, 4);
assert_eq!(array.data_type(), data_type);
let cast_array = cast(&array, &DataType::Null).expect("cast failed");
assert_eq!(cast_array.data_type(), &DataType::Null);
for i in 0..4 {
assert!(cast_array.is_null(i));
}
}
// Cast from null to data_type
{
let array = new_null_array(&DataType::Null, 4);
assert_eq!(array.data_type(), &DataType::Null);
let cast_array = cast(&array, data_type).expect("cast failed");
assert_eq!(cast_array.data_type(), data_type);
for i in 0..4 {
assert!(cast_array.is_null(i));
}
}
}

#[test]
fn test_cast_null_from_and_to_variable_sized() {
cast_from_and_to_null(&DataType::Utf8);
cast_from_and_to_null(&DataType::LargeUtf8);
cast_from_and_to_null(&DataType::Binary);
cast_from_and_to_null(&DataType::LargeBinary);
}

#[test]
fn test_cast_null_from_and_to_nested_type() {
// Cast null from and to map
let data_type = DataType::Map(
Box::new(Field::new(
"entry",
DataType::Struct(vec![
Field::new("key", DataType::Utf8, false),
Field::new("value", DataType::Int32, true),
]),
false,
)),
false,
);
cast_from_and_to_null(&data_type);

// Cast null from and to list
let data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
cast_from_and_to_null(&data_type);
let data_type =
DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
cast_from_and_to_null(&data_type);
let data_type = DataType::FixedSizeList(
Box::new(Field::new("item", DataType::Int32, true)),
4,
);
cast_from_and_to_null(&data_type);

// Cast null from and to dictionary
let values = vec![None, None, None, None] as Vec<Option<&str>>;
let array: DictionaryArray<Int8Type> = values.into_iter().collect();
let array = Arc::new(array) as ArrayRef;
let data_type = array.data_type().to_owned();
cast_from_and_to_null(&data_type);

// Cast null from and to struct
let data_type =
DataType::Struct(vec![Field::new("data", DataType::Int64, false)]);
cast_from_and_to_null(&data_type);
}

/// Print the `DictionaryArray` `array` as a vector of strings
Expand Down

0 comments on commit dbc9929

Please sign in to comment.