Skip to content

Commit

Permalink
Add Decimal256 API (#1914)
Browse files Browse the repository at this point in the history
* Add Decimal256

* Dedup

* Truncate string representation by precision

* Update arrow/src/util/decimal.rs

Co-authored-by: Remzi Yang <59198230+HaoYang670@users.noreply.github.com>

* Update arrow/src/util/decimal.rs

Co-authored-by: Remzi Yang <59198230+HaoYang670@users.noreply.github.com>

* Update arrow/src/util/decimal.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* For review

* Fix clippy

* For review

* Move another one

Co-authored-by: Remzi Yang <59198230+HaoYang670@users.noreply.github.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
3 people committed Jun 23, 2022
1 parent f8afc14 commit f0df5e0
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 79 deletions.
4 changes: 2 additions & 2 deletions arrow/src/array/array_binary.rs
Expand Up @@ -33,7 +33,7 @@ use crate::datatypes::{
};
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use crate::util::decimal::Decimal128;
use crate::util::decimal::{BasicDecimal, Decimal128};
use crate::{buffer::MutableBuffer, datatypes::DataType};

/// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
Expand Down Expand Up @@ -823,7 +823,7 @@ impl DecimalArray {

#[inline]
pub fn value_as_string(&self, row: usize) -> String {
self.value(row).as_string()
self.value(row).to_string()
}

pub fn from_fixed_size_list_array(
Expand Down
308 changes: 231 additions & 77 deletions arrow/src/util/decimal.rs
Expand Up @@ -17,124 +17,278 @@

//! Decimal related utils

use std::cmp::Ordering;
use crate::error::{ArrowError, Result};
use num::bigint::BigInt;
use std::cmp::{min, Ordering};

pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq {
/// The bit-width of the internal representation.
const BIT_WIDTH: usize;

/// Tries to create a decimal value from precision, scale and bytes.
/// If the length of bytes isn't same as the bit width of this decimal,
/// returning an error. The bytes should be stored in little-endian order.
///
/// Safety:
/// This method doesn't validate if the decimal value represented by the bytes
/// can be fitted into the specified precision.
fn try_new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> Result<Self>
where
Self: Sized,
{
if precision < scale {
return Err(ArrowError::InvalidArgumentError(format!(
"Precision {} is less than scale {}",
precision, scale
)));
}

if bytes.len() == Self::BIT_WIDTH / 8 {
Ok(Self::new(precision, scale, bytes))
} else {
Err(ArrowError::InvalidArgumentError(format!(
"Input to Decimal{} must be {} bytes",
Self::BIT_WIDTH,
Self::BIT_WIDTH / 8
)))
}
}

/// Creates a decimal value from precision, scale, and bytes.
///
/// Safety:
/// This method doesn't check if the length of bytes is compatible with this decimal.
/// Use `try_new_from_bytes` for safe constructor.
fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self;

/// Returns the raw bytes of the integer representation of the decimal.
fn raw_value(&self) -> &[u8];

/// Returns the precision of the decimal.
fn precision(&self) -> usize;

/// Returns the scale of the decimal.
fn scale(&self) -> usize;

/// Returns the string representation of the decimal.
/// If the string representation cannot be fitted with the precision of the decimal,
/// the string will be truncated.
fn to_string(&self) -> String {
let raw_bytes = self.raw_value();
let integer = BigInt::from_signed_bytes_le(raw_bytes);
let value_str = integer.to_string();
let (sign, rest) =
value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 });
let bound = min(self.precision(), rest.len()) + sign.len();
let value_str = &value_str[0..bound];

if self.scale() == 0 {
value_str.to_string()
} else if rest.len() > self.scale() {
// Decimal separator is in the middle of the string
let (whole, decimal) = value_str.split_at(value_str.len() - self.scale());
format!("{}.{}", whole, decimal)
} else {
// String has to be padded
format!("{}0.{:0>width$}", sign, rest, width = self.scale())
}
}
}

/// Represents a decimal value with precision and scale.
/// The decimal value is represented by a signed 128-bit integer.
/// The decimal value could represented by a signed 128-bit integer.
#[derive(Debug)]
pub struct Decimal128 {
#[allow(dead_code)]
precision: usize,
scale: usize,
value: i128,
value: [u8; 16],
}

impl PartialOrd for Decimal128 {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
assert_eq!(
self.scale, other.scale,
"Cannot compare two Decimal128 with different scale: {}, {}",
self.scale, other.scale
);
self.value.partial_cmp(&other.value)
impl Decimal128 {
/// Creates `Decimal128` from an `i128` value.
pub(crate) fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
Decimal128 {
precision,
scale,
value: value.to_le_bytes(),
}
}
}

impl Ord for Decimal128 {
fn cmp(&self, other: &Self) -> Ordering {
assert_eq!(
self.scale, other.scale,
"Cannot compare two Decimal128 with different scale: {}, {}",
self.scale, other.scale
);
self.value.cmp(&other.value)
/// Returns `i128` representation of the decimal.
pub fn as_i128(&self) -> i128 {
i128::from_le_bytes(self.value)
}
}

impl PartialEq<Self> for Decimal128 {
fn eq(&self, other: &Self) -> bool {
assert_eq!(
self.scale, other.scale,
"Cannot compare two Decimal128 with different scale: {}, {}",
self.scale, other.scale
);
self.value.eq(&other.value)
impl From<Decimal128> for i128 {
fn from(decimal: Decimal128) -> Self {
decimal.as_i128()
}
}

impl Eq for Decimal128 {}
/// Represents a decimal value with precision and scale.
/// The decimal value could be represented by a signed 256-bit integer.
#[derive(Debug)]
pub struct Decimal256 {
#[allow(dead_code)]
precision: usize,
scale: usize,
value: [u8; 32],
}

impl Decimal128 {
pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> Self {
let as_array = bytes.try_into();
let value = match as_array {
Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
_ => panic!("Input to Decimal128 is not 128bit integer."),
};
macro_rules! def_decimal {
($ty:ident, $bit:expr) => {
impl BasicDecimal for $ty {
const BIT_WIDTH: usize = $bit;

Decimal128 {
precision,
scale,
value,
}
}
fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self {
$ty {
precision,
scale,
value: bytes.try_into().unwrap(),
}
}

pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
Decimal128 {
precision,
scale,
value,
fn raw_value(&self) -> &[u8] {
&self.value
}

fn precision(&self) -> usize {
self.precision
}

fn scale(&self) -> usize {
self.scale
}
}
}

pub fn as_i128(&self) -> i128 {
self.value
}
impl PartialOrd for $ty {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
assert_eq!(
self.scale, other.scale,
"Cannot compare two Decimals with different scale: {}, {}",
self.scale, other.scale
);
self.value.partial_cmp(&other.value)
}
}

pub fn as_string(&self) -> String {
let value_str = self.value.to_string();
impl Ord for $ty {
fn cmp(&self, other: &Self) -> Ordering {
assert_eq!(
self.scale, other.scale,
"Cannot compare two Decimals with different scale: {}, {}",
self.scale, other.scale
);
self.value.cmp(&other.value)
}
}

if self.scale == 0 {
value_str
} else {
let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 } else { 1 });

if rest.len() > self.scale {
// Decimal separator is in the middle of the string
let (whole, decimal) = value_str.split_at(value_str.len() - self.scale);
format!("{}.{}", whole, decimal)
} else {
// String has to be padded
format!("{}0.{:0>width$}", sign, rest, width = self.scale)
impl PartialEq<Self> for $ty {
fn eq(&self, other: &Self) -> bool {
assert_eq!(
self.scale, other.scale,
"Cannot compare two Decimals with different scale: {}, {}",
self.scale, other.scale
);
self.value.eq(&other.value)
}
}
}
}

impl From<Decimal128> for i128 {
fn from(decimal: Decimal128) -> Self {
decimal.as_i128()
}
impl Eq for $ty {}
};
}

def_decimal!(Decimal128, 128);
def_decimal!(Decimal256, 256);

#[cfg(test)]
mod tests {
use crate::util::decimal::Decimal128;
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};

#[test]
fn decimal_128_to_string() {
let mut value = Decimal128::new_from_i128(5, 2, 100);
assert_eq!(value.as_string(), "1.00");
assert_eq!(value.to_string(), "1.00");

value = Decimal128::new_from_i128(5, 3, 100);
assert_eq!(value.as_string(), "0.100");
assert_eq!(value.to_string(), "0.100");
}

#[test]
fn decimal_128_from_bytes() {
fn decimal_invalid_precision_scale() {
let bytes = 100_i128.to_le_bytes();
let value = Decimal128::new_from_bytes(5, 2, &bytes);
assert_eq!(value.as_string(), "1.00");
let err = Decimal128::try_new_from_bytes(5, 6, &bytes);
assert!(err.is_err());
}

#[test]
fn decimal_128_from_bytes() {
let mut bytes = 100_i128.to_le_bytes();
let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
assert_eq!(value.to_string(), "1.00");

bytes = (-1_i128).to_le_bytes();
let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
assert_eq!(value.to_string(), "-0.01");

bytes = i128::MAX.to_le_bytes();
let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap();
assert_eq!(value.to_string(), "170141183460469231731687303715884105.72");

bytes = i128::MIN.to_le_bytes();
let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap();
assert_eq!(
value.to_string(),
"-170141183460469231731687303715884105.72"
);

// Truncated
bytes = 12345_i128.to_le_bytes();
let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap();
assert_eq!(value.to_string(), "1.23");

bytes = (-12345_i128).to_le_bytes();
let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap();
assert_eq!(value.to_string(), "-1.23");
}

#[test]
fn decimal_256_from_bytes() {
let mut bytes = vec![0; 32];
bytes[0..16].clone_from_slice(&100_i128.to_le_bytes());
let value = Decimal256::try_new_from_bytes(5, 2, bytes.as_slice()).unwrap();
assert_eq!(value.to_string(), "1.00");

bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes());
let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap();
assert_eq!(
value.to_string(),
"17014118346046923173168730371588410.5727"
);

// i128 maximum + 1
bytes[0..16].clone_from_slice(&0_i128.to_le_bytes());
bytes[15] = 128;
let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap();
assert_eq!(
value.to_string(),
"17014118346046923173168730371588410.5728"
);

// smaller than i128 minimum
bytes = vec![255; 32];
bytes[31] = 128;
let value = Decimal256::try_new_from_bytes(79, 4, &bytes).unwrap();
assert_eq!(
value.to_string(),
"-5744373177007483132341216834415376678658315645522012356644966081642565415.7313"
);

bytes = vec![255; 32];
let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap();
assert_eq!(value.to_string(), "-0.01");
}

fn i128_func(value: impl Into<i128>) -> i128 {
Expand Down

0 comments on commit f0df5e0

Please sign in to comment.