From 97e1f2614678823dd8409242ebb690b60ba6345d Mon Sep 17 00:00:00 2001 From: liukun4515 Date: Mon, 8 Aug 2022 12:11:32 +0800 Subject: [PATCH] change the api of , add example of the usage --- arrow/src/array/array_decimal.rs | 15 +++++++++------ arrow/src/compute/kernels/cast.rs | 18 ++++++++++++++---- .../src/arrow/array_reader/primitive_array.rs | 4 +++- parquet/src/arrow/buffer/converter.rs | 6 ++++-- 4 files changed, 30 insertions(+), 13 deletions(-) diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 9d7644befd6e..79c1d407e121 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -57,7 +57,7 @@ use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256}; /// // set precision and scale so values are interpreted /// // as `8887.000000`, `Null`, and `-8887.000000` /// let decimal_array = decimal_array -/// .with_precision_and_scale(23, 6) +/// .with_precision_and_scale(23, 6, true) /// .unwrap(); /// /// assert_eq!(&DataType::Decimal128(23, 6), decimal_array.data_type()); @@ -253,11 +253,15 @@ pub trait BasicDecimalArray>: /// Returns a Decimal array with the same data as self, with the /// specified precision. /// + /// If make sure that all values in this array are not out of ranges/bounds with the specified precision, + /// please set `need_validation` to `false, otherwise set to `true`. + /// /// Returns an Error if: /// 1. `precision` is larger than [`Self::MAX_PRECISION`] /// 2. `scale` is larger than [`Self::MAX_SCALE`]; /// 3. `scale` is > `precision` - fn with_precision_and_scale(self, precision: usize, scale: usize) -> Result + /// 4. `need_validation` is `true`, but some values are out of ranges/bounds + fn with_precision_and_scale(self, precision: usize, scale: usize, need_validation: bool) -> Result where Self: Sized, { @@ -282,10 +286,9 @@ pub trait BasicDecimalArray>: ))); } - // Ensure that all values are within the requested - // precision. For performance, only check if the precision is - // decreased - self.validate_decimal_precision(precision)?; + if need_validation { + self.validate_decimal_precision(precision)?; + } let data_type = if Self::VALUE_LENGTH == 16 { DataType::Decimal128(self.precision(), self.scale()) diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index c6b8f477986f..9f0f9a8d8cf7 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -431,8 +431,8 @@ pub fn cast_with_options( return Ok(array.clone()); } match (from_type, to_type) { - (Decimal128(_, s1), Decimal128(p2, s2)) => { - cast_decimal_to_decimal(array, s1, p2, s2) + (Decimal128(p1, s1), Decimal128(p2, s2)) => { + cast_decimal_to_decimal(array, p1,s1, p2, s2) } (Decimal128(_, scale), _) => { // cast decimal to other type @@ -1254,6 +1254,7 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 { /// Cast one type of decimal array to another type of decimal array fn cast_decimal_to_decimal( array: &ArrayRef, + input_precision, &usize, input_scale: &usize, output_precision: &usize, output_scale: &usize, @@ -1276,8 +1277,17 @@ fn cast_decimal_to_decimal( .iter() .map(|v| v.map(|v| v.as_i128() * mul)) .collect::() - } - .with_precision_and_scale(*output_precision, *output_scale)?; + }; + // For decimal cast to decimal, if the range of output is gt_eq than the input, don't need to + // do validation. + let output_array = match output_precision-output_scale>=input_precision - input_scale { + true => { + output_array.with_precision_and_scale(*output_precision, *output_scale, false) + } + false => { + output_array.with_precision_and_scale(*output_precision, *output_scale, true) + } + }?; Ok(Arc::new(output_array)) } diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index 45614d50941c..c025e29d2c87 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -184,6 +184,8 @@ where let a = arrow::compute::cast(&array, &ArrowType::Date32)?; arrow::compute::cast(&a, &target_type)? } + // In the parquet file, if the logical/converted type is decimal and the physical type + // is INT32 or INT64, don't need to do validation. ArrowType::Decimal128(p, s) => { let array = match array.data_type() { ArrowType::Int32 => array @@ -208,7 +210,7 @@ where )) } } - .with_precision_and_scale(p, s)?; + .with_precision_and_scale(p, s, false)?; Arc::new(array) as ArrayRef } diff --git a/parquet/src/arrow/buffer/converter.rs b/parquet/src/arrow/buffer/converter.rs index 4cd0589424fc..c291b2c57e55 100644 --- a/parquet/src/arrow/buffer/converter.rs +++ b/parquet/src/arrow/buffer/converter.rs @@ -82,11 +82,12 @@ impl Converter>, Decimal128Array> for DecimalArrayConverter { fn convert(&self, source: Vec>) -> Result { + // In the parquet file, if the logical/converted type is decimal, don't need to do validation. let array = source .into_iter() .map(|array| array.map(|array| from_bytes_to_i128(array.data()))) .collect::() - .with_precision_and_scale(self.precision as usize, self.scale as usize)?; + .with_precision_and_scale(self.precision as usize, self.scale as usize, false)?; Ok(array) } @@ -94,11 +95,12 @@ impl Converter>, Decimal128Array> impl Converter>, Decimal128Array> for DecimalArrayConverter { fn convert(&self, source: Vec>) -> Result { + // In the parquet file, if the logical/converted type is decimal, don't need to do validation. let array = source .into_iter() .map(|array| array.map(|array| from_bytes_to_i128(array.data()))) .collect::() - .with_precision_and_scale(self.precision as usize, self.scale as usize)?; + .with_precision_and_scale(self.precision as usize, self.scale as usize, false)?; Ok(array) }