Skip to content

Commit

Permalink
refine validation for decimal128 array (#2428)
Browse files Browse the repository at this point in the history
  • Loading branch information
liukun4515 committed Aug 12, 2022
1 parent ee2818e commit 0e97491
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 45 deletions.
30 changes: 21 additions & 9 deletions arrow/benches/decimal_validate.rs
Expand Up @@ -18,8 +18,12 @@
#[macro_use]
extern crate criterion;

use arrow::array::{Array, Decimal128Array, Decimal256Array, Decimal256Builder};
use arrow::array::{
Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder,
};
use criterion::Criterion;
use num::BigInt;
use rand::Rng;

extern crate arrow;

Expand All @@ -34,7 +38,15 @@ fn validate_decimal256_array(array: Decimal256Array) {
}

fn validate_decimal128_benchmark(c: &mut Criterion) {
let decimal_array = Decimal128Array::from_iter_values(vec![12324; 20000]);
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0);
for _ in 0..size {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..999999999999))
.unwrap();
}
let decimal_array = decimal_builder.finish();
let data = decimal_array.into_data();
c.bench_function("validate_decimal128_array 20000", |b| {
b.iter(|| {
Expand All @@ -45,13 +57,13 @@ fn validate_decimal128_benchmark(c: &mut Criterion) {
}

fn validate_decimal256_benchmark(c: &mut Criterion) {
let mut decimal_builder = Decimal256Builder::new(20000, 76, 0);
let mut bytes = vec![0; 32];
bytes[0..16].clone_from_slice(&12324_i128.to_le_bytes());
for _ in 0..20000 {
decimal_builder
.append_value(&Decimal256::new(76, 0, &bytes))
.unwrap();
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0);
for _ in 0..size {
let v = rng.gen_range::<i128, _>(0..999999999999999);
let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap();
decimal_builder.append_value(&decimal).unwrap();
}
let decimal_array256_data = decimal_builder.finish();
let data = decimal_array256_data.into_data();
Expand Down
12 changes: 8 additions & 4 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -273,10 +273,14 @@ impl Decimal128Array {
// Validates decimal values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
for v in self.iter().flatten() {
validate_decimal_precision(v.as_i128(), precision)?;
}
Ok(())
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let decimal = unsafe { self.value_unchecked(idx) };
validate_decimal_precision(decimal.as_i128(), precision)
} else {
Ok(())
}
})
}

/// Returns a Decimal array with the same data as self, with the
Expand Down
27 changes: 4 additions & 23 deletions arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -85,33 +85,14 @@ impl Decimal128Builder {
/// Appends a decimal value into the builder.
#[inline]
pub fn append_value(&mut self, value: impl Into<i128>) -> Result<()> {
let value = if self.value_validation {
validate_decimal_precision(value.into(), self.precision)?
} else {
value.into()
};

let value_as_bytes =
Self::from_i128_to_fixed_size_bytes(value, Self::BYTE_LENGTH as usize)?;
if Self::BYTE_LENGTH != value_as_bytes.len() as i32 {
return Err(ArrowError::InvalidArgumentError(
"Byte slice does not have the same length as Decimal128Builder value lengths".to_string()
));
let value = value.into();
if self.value_validation {
validate_decimal_precision(value, self.precision)?
}
let value_as_bytes: [u8; 16] = value.to_le_bytes();
self.builder.append_value(value_as_bytes.as_slice())
}

pub(crate) fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
if size > 16 {
return Err(ArrowError::InvalidArgumentError(
"Decimal128Builder only supports values up to 16 bytes.".to_string(),
));
}
let res = v.to_le_bytes();
let start_byte = 16 - size;
Ok(res[start_byte..16].to_vec())
}

/// Append a null value to the array.
#[inline]
pub fn append_null(&mut self) {
Expand Down
6 changes: 1 addition & 5 deletions arrow/src/array/data.rs
Expand Up @@ -2825,11 +2825,7 @@ mod tests {
let byte_width = 16;
let mut fixed_size_builder =
FixedSizeListBuilder::new(values_builder, byte_width);
let value_as_bytes = Decimal128Builder::from_i128_to_fixed_size_bytes(
123456,
fixed_size_builder.value_length() as usize,
)
.unwrap();
let value_as_bytes = 123456_i128.to_le_bytes();
fixed_size_builder
.values()
.append_slice(value_as_bytes.as_slice());
Expand Down
10 changes: 8 additions & 2 deletions arrow/src/csv/reader.rs
Expand Up @@ -776,8 +776,14 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu
if negative {
result = result.neg();
}
validate_decimal_precision(result, precision)
.map_err(|e| ArrowError::ParseError(format!("parse decimal overflow: {}", e)))

match validate_decimal_precision(result, precision) {
Ok(_) => Ok(result),
Err(e) => Err(ArrowError::ParseError(format!(
"parse decimal overflow: {}",
e
))),
}
} else {
Err(ArrowError::ParseError(format!(
"can't parse the string value {} to decimal",
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/datatypes/datatype.rs
Expand Up @@ -989,7 +989,7 @@ pub const DECIMAL_DEFAULT_SCALE: usize = 10;
/// Validates that the specified `i128` value can be properly
/// interpreted as a Decimal number with precision `precision`
#[inline]
pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<i128> {
pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<()> {
if precision > DECIMAL128_MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"Max precision of a Decimal128 is {}, but got {}",
Expand All @@ -1011,7 +1011,7 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul
value, precision, min
)))
} else {
Ok(value)
Ok(())
}
}

Expand Down

0 comments on commit 0e97491

Please sign in to comment.