Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Decimal128Iter and Decimal256Iter and do maximum precision/scale check #2140

Merged
merged 4 commits into from
Jul 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use super::{
FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
};
use crate::array::array::ArrayAccessor;
pub use crate::array::DecimalIter;
use crate::buffer::Buffer;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
Expand Down
127 changes: 94 additions & 33 deletions arrow/src/array/array_decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{ArrayAccessor, Decimal128Iter, Decimal256Iter};
use std::borrow::Borrow;
use std::convert::From;
use std::fmt;
Expand All @@ -24,13 +25,11 @@ use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
};
use super::{BooleanBufferBuilder, FixedSizeBinaryArray};
#[allow(deprecated)]
pub use crate::array::DecimalIter;
use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::datatypes::{
validate_decimal_precision, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION,
DECIMAL_MAX_SCALE,
};
use crate::datatypes::{validate_decimal_precision, DECIMAL_DEFAULT_SCALE};
use crate::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};

Expand Down Expand Up @@ -103,11 +102,18 @@ pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:

/// Returns the element at index `i`.
fn value(&self, i: usize) -> T {
let data = self.data();
assert!(i < data.len(), "Out of bounds access");
assert!(i < self.data().len(), "Out of bounds access");

unsafe { self.value_unchecked(i) }
}

/// Returns the element at index `i`.
/// # Safety
/// Caller is responsible for ensuring that the index is within the bounds of the array
unsafe fn value_unchecked(&self, i: usize) -> T {
let data = self.data();
let offset = i + data.offset();
let raw_val = unsafe {
let raw_val = {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
self.raw_value_data_ptr().offset(pos as isize),
Expand Down Expand Up @@ -270,24 +276,24 @@ impl Decimal128Array {
/// specified precision.
///
/// Returns an Error if:
/// 1. `precision` is larger than [`DECIMAL_MAX_PRECISION`]
/// 2. `scale` is larger than [`DECIMAL_MAX_SCALE`];
/// 1. `precision` is larger than [`DECIMAL128_MAX_PRECISION`]
/// 2. `scale` is larger than [`DECIMAL128_MAX_SCALE`];
/// 3. `scale` is > `precision`
pub fn with_precision_and_scale(
mut self,
precision: usize,
scale: usize,
) -> Result<Self> {
if precision > DECIMAL_MAX_PRECISION {
if precision > DECIMAL128_MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"precision {} is greater than max {}",
precision, DECIMAL_MAX_PRECISION
precision, DECIMAL128_MAX_PRECISION
)));
}
if scale > DECIMAL_MAX_SCALE {
if scale > DECIMAL128_MAX_SCALE {
return Err(ArrowError::InvalidArgumentError(format!(
"scale {} is greater than max {}",
scale, DECIMAL_MAX_SCALE
scale, DECIMAL128_MAX_SCALE
)));
}
if scale > precision {
Expand All @@ -302,7 +308,7 @@ impl Decimal128Array {
// decreased
if precision < self.precision {
for v in self.iter().flatten() {
validate_decimal_precision(v, precision)?;
validate_decimal_precision(v.as_i128(), precision)?;
}
}

Expand All @@ -322,7 +328,7 @@ impl Decimal128Array {
/// The default precision and scale used when not specified.
pub fn default_type() -> DataType {
// Keep maximum precision
DataType::Decimal(DECIMAL_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
DataType::Decimal(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
}
}

Expand Down Expand Up @@ -368,19 +374,13 @@ impl From<ArrayData> for Decimal256Array {
}
}

impl<'a> IntoIterator for &'a Decimal128Array {
type Item = Option<i128>;
type IntoIter = DecimalIter<'a>;

fn into_iter(self) -> Self::IntoIter {
DecimalIter::<'a>::new(self)
}
}

impl<'a> Decimal128Array {
/// constructs a new iterator
pub fn iter(&'a self) -> DecimalIter<'a> {
DecimalIter::new(self)
/// Constructs a new iterator that iterates `Decimal128` values as i128 values.
/// This is kept mostly for back-compatibility purpose.
/// Suggests to use `iter()` that returns `Decimal128Iter`.
#[allow(deprecated)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth adding the above comment as a deprecation hint?

pub fn i128_iter(&'a self) -> DecimalIter<'a> {
DecimalIter::<'a>::new(self)
}
}

Expand Down Expand Up @@ -421,7 +421,7 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for Decimal128Array {
}

macro_rules! def_decimal_array {
($ty:ident, $array_name:expr) => {
($ty:ident, $array_name:expr, $decimal_ty:ident, $iter_ty:ident) => {
impl private_decimal::DecimalArrayPrivate for $ty {
fn raw_value_data_ptr(&self) -> *const u8 {
self.value_data.as_ptr()
Expand Down Expand Up @@ -463,15 +463,55 @@ macro_rules! def_decimal_array {
write!(f, "]")
}
}

impl<'a> ArrayAccessor for &'a $ty {
type Item = $decimal_ty;

fn value(&self, index: usize) -> Self::Item {
$ty::value(self, index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
$ty::value_unchecked(self, index)
}
}

impl<'a> IntoIterator for &'a $ty {
type Item = Option<$decimal_ty>;
type IntoIter = $iter_ty<'a>;

fn into_iter(self) -> Self::IntoIter {
$iter_ty::<'a>::new(self)
}
}

impl<'a> $ty {
/// constructs a new iterator
pub fn iter(&'a self) -> $iter_ty<'a> {
$iter_ty::<'a>::new(self)
}
}
};
}

def_decimal_array!(Decimal128Array, "Decimal128Array");
def_decimal_array!(Decimal256Array, "Decimal256Array");
def_decimal_array!(
Decimal128Array,
"Decimal128Array",
Decimal128,
Decimal128Iter
);
def_decimal_array!(
Decimal256Array,
"Decimal256Array",
Decimal256,
Decimal256Iter
);

#[cfg(test)]
mod tests {
use crate::array::Decimal256Builder;
use crate::{array::Decimal128Builder, datatypes::Field};
use num::{BigInt, Num};

use super::*;

Expand Down Expand Up @@ -567,7 +607,7 @@ mod tests {
let data = vec![Some(-100), None, Some(101)];
let array: Decimal128Array = data.clone().into_iter().collect();

let collected: Vec<_> = array.iter().collect();
let collected: Vec<_> = array.iter().map(|d| d.map(|v| v.as_i128())).collect();
assert_eq!(data, collected);
}

Expand All @@ -576,7 +616,8 @@ mod tests {
let data = vec![Some(-100), None, Some(101)];
let array: Decimal128Array = data.clone().into_iter().collect();

let collected: Vec<_> = array.into_iter().collect();
let collected: Vec<_> =
array.into_iter().map(|d| d.map(|v| v.as_i128())).collect();
assert_eq!(data, collected);
}

Expand Down Expand Up @@ -750,4 +791,24 @@ mod tests {
assert!(decimal.is_null(0));
assert_eq!(decimal.value_as_string(1), "56".to_string());
}

#[test]
fn test_decimal256_iter() {
// TODO: Impl FromIterator for Decimal256Array
let mut builder = Decimal256Builder::new(30, 76, 6);
let value = BigInt::from_str_radix("12345", 10).unwrap();
let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal1).unwrap();

builder.append_null();

let value = BigInt::from_str_radix("56789", 10).unwrap();
let decimal2 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal2).unwrap();

let array: Decimal256Array = builder.finish();

let collected: Vec<_> = array.iter().collect();
assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
}
}
17 changes: 16 additions & 1 deletion arrow/src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::array::array::ArrayAccessor;
use crate::array::BasicDecimalArray;
use crate::array::{BasicDecimalArray, Decimal256Array};

use super::{
Array, BooleanArray, Decimal128Array, GenericBinaryArray, GenericListArray,
Expand Down Expand Up @@ -104,15 +104,28 @@ pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;

/// an iterator that returns `Some(Decimal128)` or `None`, that can be used on a
/// [`Decimal128Array`]
pub type Decimal128Iter<'a> = ArrayIter<&'a Decimal128Array>;

/// an iterator that returns `Some(Decimal256)` or `None`, that can be used on a
/// [`Decimal256Array`]
pub type Decimal256Iter<'a> = ArrayIter<&'a Decimal256Array>;

/// an iterator that returns `Some(i128)` or `None`, that can be used on a
/// [`Decimal128Array`]
#[derive(Debug)]
#[deprecated(note = "Please use `Decimal128Iter` instead. \
`DecimalIter` iterates `Decimal128` values as i128 values. \
This is kept mostly for back-compatibility purpose. Suggests to use `Decimal128Array.iter()` \
that returns `Decimal128Iter`.")]
pub struct DecimalIter<'a> {
array: &'a Decimal128Array,
current: usize,
current_end: usize,
}

#[allow(deprecated)]
impl<'a> DecimalIter<'a> {
pub fn new(array: &'a Decimal128Array) -> Self {
Self {
Expand All @@ -123,6 +136,7 @@ impl<'a> DecimalIter<'a> {
}
}

#[allow(deprecated)]
impl<'a> std::iter::Iterator for DecimalIter<'a> {
type Item = Option<i128>;

Expand Down Expand Up @@ -150,6 +164,7 @@ impl<'a> std::iter::Iterator for DecimalIter<'a> {
}

/// iterator has known size.
#[allow(deprecated)]
impl<'a> std::iter::ExactSizeIterator for DecimalIter<'a> {}

#[cfg(test)]
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1205,15 +1205,15 @@ fn cast_decimal_to_decimal(
let div = 10_i128.pow((input_scale - output_scale) as u32);
array
.iter()
.map(|v| v.map(|v| v / div))
.map(|v| v.map(|v| v.as_i128() / div))
.collect::<Decimal128Array>()
} else {
// For example, input_scale is 3 and output_scale is 4;
// Original value is 1123_i128, and will be cast to 11230_i128.
let mul = 10_i128.pow((output_scale - input_scale) as u32);
array
.iter()
.map(|v| v.map(|v| v * mul))
.map(|v| v.map(|v| v.as_i128() * mul))
.collect::<Decimal128Array>()
}
.with_precision_and_scale(*output_precision, *output_scale)?;
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/datatypes/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -431,10 +431,10 @@ pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
];

/// The maximum precision for [DataType::Decimal] values
pub const DECIMAL_MAX_PRECISION: usize = 38;
pub const DECIMAL128_MAX_PRECISION: usize = 38;

/// The maximum scale for [DataType::Decimal] values
pub const DECIMAL_MAX_SCALE: usize = 38;
pub const DECIMAL128_MAX_SCALE: usize = 38;

/// The maximum precision for [DataType::Decimal256] values
pub const DECIMAL256_MAX_PRECISION: usize = 76;
Expand Down