diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml index e42e75b838ce..59b7351c609f 100644 --- a/arrow-cast/Cargo.toml +++ b/arrow-cast/Cargo.toml @@ -45,6 +45,7 @@ arrow-buffer = { workspace = true } arrow-data = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true } +half = { version = "2.1", default-features = false } chrono = { version = "0.4.23", default-features = false, features = ["clock"] } num = { version = "0.4", default-features = false, features = ["std"] } lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] } diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 37fede0a6fe0..8821786e3704 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -51,6 +51,7 @@ use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer}; use arrow_data::ArrayData; use arrow_schema::*; use arrow_select::take::take; +use half::f16; use num::cast::AsPrimitive; use num::{NumCast, ToPrimitive}; @@ -92,6 +93,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { | UInt8 | Int16 | UInt16 + | Float16 | Int32 | UInt32 | Float32 @@ -150,12 +152,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (UInt8 | UInt16 | UInt32 | UInt64, Decimal128(_, _)) | (UInt8 | UInt16 | UInt32 | UInt64, Decimal256(_, _)) | // signed numeric to decimal - (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) | - (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal256(_, _)) | + (Null | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64, Decimal128(_, _)) | + (Null | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64, Decimal256(_, _)) | // decimal to unsigned numeric (Decimal128(_, _) | Decimal256(_, _), UInt8 | UInt16 | UInt32 | UInt64) | // decimal to signed numeric - (Decimal128(_, _) | Decimal256(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true, + (Decimal128(_, _) | Decimal256(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64) => true, // decimal to Utf8 (Decimal128(_, _) | Decimal256(_, _), Utf8 | LargeUtf8) => true, // Utf8 to decimal @@ -199,8 +201,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { // start numeric casts ( - UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, - UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, + UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64, + UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64, ) => true, // end numeric casts @@ -274,7 +276,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { /// * Time32 and Time64: precision lost when going to higher interval /// * Timestamp and Date{32|64}: precision lost when going to higher interval /// * Temporal to/from backing primitive: zero-copy with data type change -/// * Casting from `float32/float64` to `Decimal(precision, scale)` rounds to the `scale` decimals +/// * Casting from `float16/float32/float64` to `Decimal(precision, scale)` rounds to the `scale` decimals /// (i.e. casting 6.4999 to Decimal(10, 1) becomes 6.5). This is the breaking change from `26.0.0`. /// It used to truncate it instead of round (i.e. outputs 6.4 instead) /// @@ -711,6 +713,7 @@ pub fn cast_with_options( | UInt8 | Int16 | UInt16 + | Float16 | Int32 | UInt32 | Float32 @@ -886,6 +889,11 @@ pub fn cast_with_options( *scale, cast_options, ), + Float16 => { + cast_decimal_to_float::(array, |x| { + f16::from_f64(x as f64 / 10_f64.powi(*scale as i32)) + }) + } Float32 => { cast_decimal_to_float::(array, |x| { (x as f64 / 10_f64.powi(*scale as i32)) as f32 @@ -955,6 +963,11 @@ pub fn cast_with_options( *scale, cast_options, ), + Float16 => { + cast_decimal_to_float::(array, |x| { + f16::from_f64_const(x.to_f64().unwrap() / 10_f64.powi(*scale as i32)) + }) + } Float32 => { cast_decimal_to_float::(array, |x| { (x.to_f64().unwrap() / 10_f64.powi(*scale as i32)) as f32 @@ -1032,6 +1045,12 @@ pub fn cast_with_options( 10_i128, cast_options, ), + Float16 => cast_floating_point_to_decimal128( + array.as_primitive::(), + *precision, + *scale, + cast_options, + ), Float32 => cast_floating_point_to_decimal128( array.as_primitive::(), *precision, @@ -1121,6 +1140,12 @@ pub fn cast_with_options( i256::from_i128(10_i128), cast_options, ), + Float16 => cast_floating_point_to_decimal256( + array.as_primitive::(), + *precision, + *scale, + cast_options, + ), Float32 => cast_floating_point_to_decimal256( array.as_primitive::(), *precision, @@ -1384,6 +1409,9 @@ pub fn cast_with_options( (UInt8, Int64) => { cast_numeric_arrays::(array, cast_options) } + (UInt8, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (UInt8, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1412,6 +1440,9 @@ pub fn cast_with_options( (UInt16, Int64) => { cast_numeric_arrays::(array, cast_options) } + (UInt16, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (UInt16, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1440,6 +1471,9 @@ pub fn cast_with_options( (UInt32, Int64) => { cast_numeric_arrays::(array, cast_options) } + (UInt32, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (UInt32, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1468,6 +1502,9 @@ pub fn cast_with_options( (UInt64, Int64) => { cast_numeric_arrays::(array, cast_options) } + (UInt64, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (UInt64, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1488,6 +1525,9 @@ pub fn cast_with_options( (Int8, Int16) => cast_numeric_arrays::(array, cast_options), (Int8, Int32) => cast_numeric_arrays::(array, cast_options), (Int8, Int64) => cast_numeric_arrays::(array, cast_options), + (Int8, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (Int8, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1514,6 +1554,9 @@ pub fn cast_with_options( (Int16, Int64) => { cast_numeric_arrays::(array, cast_options) } + (Int16, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (Int16, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1540,6 +1583,9 @@ pub fn cast_with_options( (Int32, Int64) => { cast_numeric_arrays::(array, cast_options) } + (Int32, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (Int32, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1566,6 +1612,9 @@ pub fn cast_with_options( (Int64, Int32) => { cast_numeric_arrays::(array, cast_options) } + (Int64, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (Int64, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -1573,6 +1622,37 @@ pub fn cast_with_options( cast_numeric_arrays::(array, cast_options) } + (Float16, UInt8) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, UInt16) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, UInt32) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, UInt64) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, Int8) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, Int16) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, Int32) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, Int64) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, Float32) => { + cast_numeric_arrays::(array, cast_options) + } + (Float16, Float64) => { + cast_numeric_arrays::(array, cast_options) + } + (Float32, UInt8) => { cast_numeric_arrays::(array, cast_options) } @@ -1597,6 +1677,9 @@ pub fn cast_with_options( (Float32, Int64) => { cast_numeric_arrays::(array, cast_options) } + (Float32, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (Float32, Float64) => { cast_numeric_arrays::(array, cast_options) } @@ -1625,6 +1708,9 @@ pub fn cast_with_options( (Float64, Int64) => { cast_numeric_arrays::(array, cast_options) } + (Float64, Float16) => { + cast_numeric_arrays::(array, cast_options) + } (Float64, Float32) => { cast_numeric_arrays::(array, cast_options) } @@ -4236,6 +4322,19 @@ mod tests { &DataType::Int64, vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)] ); + // f16 + generate_cast_test_case!( + &array, + Float16Array, + &DataType::Float16, + vec![ + Some(f16::from_f32(1.25_f32)), + Some(f16::from_f32(2.25_f32)), + Some(f16::from_f32(3.25_f32)), + None, + Some(f16::from_f32(5.25_f32)) + ] + ); // f32 generate_cast_test_case!( &array, @@ -4317,7 +4416,34 @@ mod tests { assert!(casted_array.is_ok()); assert!(casted_array.unwrap().is_null(0)); - // loss the precision: convert decimal to f32、f64 + // loss the precision: convert decimal to f16, f32、f64 + // f16 + // 112345678_f32 and 112345679_f32 are same, so the 112345679_f32 will lose precision. + let value_array: Vec> = vec![ + Some(125), + Some(225), + Some(325), + None, + Some(525), + Some(112345678), + Some(112345679), + ]; + let array = create_decimal_array(value_array, 38, 2).unwrap(); + generate_cast_test_case!( + &array, + Float16Array, + &DataType::Float16, + vec![ + Some(f16::from_f32(1.25_f32)), + Some(f16::from_f32(2.25_f32)), + Some(f16::from_f32(3.25_f32)), + None, + Some(f16::from_f32(5.25_f32)), + Some(f16::from_f32(1_123_456.7_f32)), + Some(f16::from_f32(1_123_456.7_f32)) + ] + ); + // f32 // 112345678_f32 and 112345679_f32 are same, so the 112345679_f32 will lose precision. let value_array: Vec> = vec![ @@ -4439,6 +4565,19 @@ mod tests { &DataType::Int64, vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)] ); + // f16 + generate_cast_test_case!( + &array, + Float16Array, + &DataType::Float16, + vec![ + Some(f16::from_f32(1.25_f32)), + Some(f16::from_f32(2.25_f32)), + Some(f16::from_f32(3.25_f32)), + None, + Some(f16::from_f32(5.25_f32)) + ] + ); // f32 generate_cast_test_case!( &array, @@ -4493,7 +4632,34 @@ mod tests { assert!(casted_array.is_ok()); assert!(casted_array.unwrap().is_null(0)); - // loss the precision: convert decimal to f32、f64 + // loss the precision: convert decimal to f16, f32、f64 + // f16 + // 112345678_f16 and 112345679_f16 are same, so the 112345679_f16 will lose precision. + let value_array: Vec> = vec![ + Some(i256::from_i128(125)), + Some(i256::from_i128(225)), + Some(i256::from_i128(325)), + None, + Some(i256::from_i128(525)), + Some(i256::from_i128(112345678)), + Some(i256::from_i128(112345679)), + ]; + let array = create_decimal256_array(value_array, 76, 2).unwrap(); + generate_cast_test_case!( + &array, + Float16Array, + &DataType::Float16, + vec![ + Some(f16::from_f32(1.25_f32)), + Some(f16::from_f32(2.25_f32)), + Some(f16::from_f32(3.25_f32)), + None, + Some(f16::from_f32(5.25_f32)), + Some(f16::from_f32(1_123_456.7_f32)), + Some(f16::from_f32(1_123_456.7_f32)) + ] + ); + // f32 // 112345678_f32 and 112345679_f32 are same, so the 112345679_f32 will lose precision. let value_array: Vec> = vec![ @@ -4663,6 +4829,30 @@ mod tests { let array: &Decimal128Array = array.as_primitive(); assert!(array.is_null(4)); + // test f16 to decimal type + let array = Float16Array::from(vec![ + Some(f16::from_f32(1.1)), + Some(f16::from_f32(2.2)), + Some(f16::from_f32(4.4)), + None, + Some(f16::from_f32(1.125_4)), // round down + Some(f16::from_f32(1.165_4)), // round up + ]); + let array = Arc::new(array) as ArrayRef; + generate_cast_test_case!( + &array, + Decimal128Array, + &decimal_type, + vec![ + Some(1099609_i128), + Some(2199219_i128), + Some(4398438_i128), + None, + Some(1125000_i128), // round down + Some(1165039_i128), // round up + ] + ); + // test f32 to decimal type let array = Float32Array::from(vec![ Some(1.1), @@ -4821,6 +5011,29 @@ mod tests { let array: &Decimal256Array = array.as_primitive(); assert!(array.is_null(4)); + // test f16 to decimal type + let array = Float16Array::from(vec![ + Some(f16::from_f32(1.1)), + Some(f16::from_f32(2.2)), + Some(f16::from_f32(4.4)), + None, + Some(f16::from_f32(1.125_4)), // round down + Some(f16::from_f32(1.165_4)), // round up + ]); + generate_cast_test_case!( + &array, + Decimal256Array, + &decimal_type, + vec![ + Some(i256::from_i128(1099609_i128)), // round down + Some(i256::from_i128(2199219_i128)), // round down + Some(i256::from_i128(4398438_i128)), // round down + None, + Some(i256::from_i128(1125000_i128)), // round down + Some(i256::from_i128(1165039_i128)), // round down + ] + ); + // test f32 to decimal type let array = Float32Array::from(vec![ Some(1.1), @@ -6363,6 +6576,25 @@ mod tests { .collect::>() ); + let f16_expected = vec![ + f16::from_f64(-9223372000000000000.0), + f16::from_f64(-2147483600.0), + f16::from_f64(-32768.0), + f16::from_f64(-128.0), + f16::from_f64(0.0), + f16::from_f64(255.0), + f16::from_f64(65535.0), + f16::from_f64(4294967300.0), + f16::from_f64(18446744000000000000.0), + ]; + assert_eq!( + f16_expected, + get_cast_values::(&f64_array, &DataType::Float16) + .iter() + .map(|i| i.parse::().unwrap()) + .collect::>() + ); + let i64_expected = vec![ "-9223372036854775808", "-2147483648", @@ -6507,6 +6739,14 @@ mod tests { get_cast_values::(&f32_array, &DataType::Float32) ); + let f16_expected = vec![ + "-inf", "-inf", "-32768.0", "-128.0", "0.0", "255.0", "inf", "inf", "inf", + ]; + assert_eq!( + f16_expected, + get_cast_values::(&f32_array, &DataType::Float16) + ); + let i64_expected = vec![ "-2147483648", "-2147483648", @@ -6596,6 +6836,85 @@ mod tests { ); } + #[test] + fn test_cast_from_f16() { + let f16_values: Vec = vec![ + f16::NEG_INFINITY, + f16::from_f32(i16::MIN as f32), + f16::from_f32(i8::MIN as f32), + f16::from_f32(0_f32), + f16::from_f32(u8::MAX as f32), + f16::INFINITY, + ]; + let f16_array: ArrayRef = Arc::new(Float16Array::from(f16_values)); + + let f64_expected = vec!["-inf", "-32768.0", "-128.0", "0.0", "255.0", "inf"]; + assert_eq!( + f64_expected, + get_cast_values::(&f16_array, &DataType::Float64) + ); + + let f32_expected = vec!["-inf", "-32768.0", "-128.0", "0.0", "255.0", "inf"]; + assert_eq!( + f32_expected, + get_cast_values::(&f16_array, &DataType::Float32) + ); + + let f16_expected = vec!["-inf", "-32768.0", "-128.0", "0.0", "255.0", "inf"]; + assert_eq!( + f16_expected, + get_cast_values::(&f16_array, &DataType::Float16) + ); + + let i64_expected = vec!["null", "-32768", "-128", "0", "255", "null"]; + assert_eq!( + i64_expected, + get_cast_values::(&f16_array, &DataType::Int64) + ); + + let i32_expected = vec!["null", "-32768", "-128", "0", "255", "null"]; + assert_eq!( + i32_expected, + get_cast_values::(&f16_array, &DataType::Int32) + ); + + let i16_expected = vec!["null", "-32768", "-128", "0", "255", "null"]; + assert_eq!( + i16_expected, + get_cast_values::(&f16_array, &DataType::Int16) + ); + + let i8_expected = vec!["null", "null", "-128", "0", "null", "null"]; + assert_eq!( + i8_expected, + get_cast_values::(&f16_array, &DataType::Int8) + ); + + let u64_expected = vec!["null", "null", "null", "0", "255", "null"]; + assert_eq!( + u64_expected, + get_cast_values::(&f16_array, &DataType::UInt64) + ); + + let u32_expected = vec!["null", "null", "null", "0", "255", "null"]; + assert_eq!( + u32_expected, + get_cast_values::(&f16_array, &DataType::UInt32) + ); + + let u16_expected = vec!["null", "null", "null", "0", "255", "null"]; + assert_eq!( + u16_expected, + get_cast_values::(&f16_array, &DataType::UInt16) + ); + + let u8_expected = vec!["null", "null", "null", "0", "255", "null"]; + assert_eq!( + u8_expected, + get_cast_values::(&f16_array, &DataType::UInt8) + ); + } + #[test] fn test_cast_from_uint64() { let u64_values: Vec = vec![ @@ -6627,6 +6946,21 @@ mod tests { .collect::>() ); + let f16_expected = vec![ + f16::from_f64(0.0), + f16::from_f64(255.0), + f16::from_f64(65535.0), + f16::from_f64(4294967300.0), + f16::from_f64(18446744000000000000.0), + ]; + assert_eq!( + f16_expected, + get_cast_values::(&u64_array, &DataType::Float16) + .iter() + .map(|i| i.parse::().unwrap()) + .collect::>() + ); + let i64_expected = vec!["0", "255", "65535", "4294967295", "null"]; assert_eq!( i64_expected, @@ -6694,6 +7028,12 @@ mod tests { get_cast_values::(&u32_array, &DataType::Float32) ); + let f16_expected = vec!["0.0", "255.0", "inf", "inf"]; + assert_eq!( + f16_expected, + get_cast_values::(&u32_array, &DataType::Float16) + ); + let i64_expected = vec!["0", "255", "65535", "4294967295"]; assert_eq!( i64_expected, @@ -6760,6 +7100,12 @@ mod tests { get_cast_values::(&u16_array, &DataType::Float32) ); + let f16_expected = vec!["0.0", "255.0", "inf"]; + assert_eq!( + f16_expected, + get_cast_values::(&u16_array, &DataType::Float16) + ); + let i64_expected = vec!["0", "255", "65535"]; assert_eq!( i64_expected, @@ -6826,6 +7172,12 @@ mod tests { get_cast_values::(&u8_array, &DataType::Float32) ); + let f16_expected = vec!["0.0", "255.0"]; + assert_eq!( + f16_expected, + get_cast_values::(&u8_array, &DataType::Float16) + ); + let i64_expected = vec!["0", "255"]; assert_eq!( i64_expected, @@ -6928,6 +7280,25 @@ mod tests { .collect::>() ); + let f16_expected = vec![ + f16::from_f64(-9223372000000000000.0), + f16::from_f64(-2147483600.0), + f16::from_f64(-32768.0), + f16::from_f64(-128.0), + f16::from_f64(0.0), + f16::from_f64(127.0), + f16::from_f64(32767.0), + f16::from_f64(2147483600.0), + f16::from_f64(9223372000000000000.0), + ]; + assert_eq!( + f16_expected, + get_cast_values::(&i64_array, &DataType::Float16) + .iter() + .map(|i| i.parse::().unwrap()) + .collect::>() + ); + let i64_expected = vec![ "-9223372036854775808", "-2147483648", @@ -7071,6 +7442,14 @@ mod tests { get_cast_values::(&i32_array, &DataType::Float32) ); + let f16_expected = vec![ + "-inf", "-32768.0", "-128.0", "0.0", "127.0", "32768.0", "inf", + ]; + assert_eq!( + f16_expected, + get_cast_values::(&i32_array, &DataType::Float16) + ); + let i16_expected = vec!["null", "-32768", "-128", "0", "127", "32767", "null"]; assert_eq!( i16_expected, @@ -7143,6 +7522,12 @@ mod tests { get_cast_values::(&i16_array, &DataType::Float32) ); + let f16_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32768.0"]; + assert_eq!( + f16_expected, + get_cast_values::(&i16_array, &DataType::Float16) + ); + let i64_expected = vec!["-32768", "-128", "0", "127", "32767"]; assert_eq!( i64_expected, @@ -7237,6 +7622,12 @@ mod tests { get_cast_values::(&i8_array, &DataType::Float32) ); + let f16_expected = vec!["-128.0", "0.0", "127.0"]; + assert_eq!( + f16_expected, + get_cast_values::(&i8_array, &DataType::Float16) + ); + let i64_expected = vec!["-128", "0", "127"]; assert_eq!( i64_expected, @@ -7525,6 +7916,7 @@ mod tests { typed_test!(UInt32Array, UInt32, UInt32Type); typed_test!(UInt64Array, UInt64, UInt64Type); + typed_test!(Float16Array, Float16, Float16Type); typed_test!(Float32Array, Float32, Float32Type); typed_test!(Float64Array, Float64, Float64Type);