Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-35942: [C++] Improve Decimal ToReal accuracy #36667

Merged
merged 11 commits into from
Jul 18, 2023
3 changes: 2 additions & 1 deletion cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) {
}
}

// Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal()
// Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in
// decimal_test.cc
}

TEST(Cast, DecimalToString) {
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/util/basic_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) const {
return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision];
}

void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole,
BasicDecimal256* fraction) const {
DCHECK_GE(scale, 0);
DCHECK_LE(scale, 76);

BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]);
auto s = Divide(multiplier, whole, fraction);
DCHECK_EQ(s, DecimalStatus::kSuccess);
}

const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) {
DCHECK_GE(scale, 0);
DCHECK_LE(scale, 76);
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/util/basic_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal<BasicDecimal256,
/// \brief Get the lowest bits of the two's complement representation of the number.
uint64_t low_bits() const { return bit_util::little_endian::Make(array_)[0]; }

/// \brief separate the integer and fractional parts for the given scale.
void GetWholeAndFraction(int32_t scale, BasicDecimal256* whole,
BasicDecimal256* fraction) const;

/// \brief Scale multiplier for given scale value.
static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
/// \brief Half-scale multiplier for given scale value.
Expand Down
58 changes: 56 additions & 2 deletions cpp/src/arrow/util/decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,12 +305,39 @@ struct Decimal128RealConversion
}

template <typename Real>
static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) {
Real x = RealTraits<Real>::two_to_64(static_cast<Real>(decimal.high_bits()));
x += static_cast<Real>(decimal.low_bits());
x *= LargePowerOfTen<Real>(-scale);
return x;
}

/// An appoximate conversion from Decimal128 to Real that guarantees:
/// 1. If the decimal is an integer, the conversion is exact.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if the integer has more than 52 significant bits??

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See below

Here "exact value" means the closest representable value by Real.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, ok :-)

/// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
/// 8 for float and 16 for double), the conversion is within 1 ULP of the exact
/// value.
/// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
/// (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
/// Here "exact value" means the closest representable value by Real.
template <typename Real>
static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
if (scale <= 0 || (decimal.high_bits() == 0 &&
decimal.low_bits() <= RealTraits<Real>::kMaxPreciseInteger)) {
// No need to split the decimal if it is already an integer (scale <= 0) or if it
// can be precisely represented by Real
return ToRealPositiveNoSplit<Real>(decimal, scale);
}

// Split decimal into whole and fractional parts to avoid precision loss
BasicDecimal128 whole_decimal, fraction_decimal;
decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);

Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);

return whole + fraction;
}
};

} // namespace
Expand Down Expand Up @@ -967,7 +994,7 @@ struct Decimal256RealConversion
}

template <typename Real>
static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) {
DCHECK_GE(decimal, 0);
Real x = 0;
const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array());
Expand All @@ -978,6 +1005,33 @@ struct Decimal256RealConversion
x *= LargePowerOfTen<Real>(-scale);
return x;
}

/// An appoximate conversion from Decimal256 to Real that guarantees:
/// 1. If the decimal is an integer, the conversion is exact.
/// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
/// 8 for float and 16 for double), the conversion is within 1 ULP of the exact
/// value.
/// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
/// (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
/// Here "exact value" means the closest representable value by Real.
template <typename Real>
static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
const auto parts_le = decimal.little_endian_array();
if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 &&
parts_le[0] < RealTraits<Real>::kMaxPreciseInteger)) {
// No need to split the decimal if it is already an integer (scale <= 0) or if it
// can be precisely represented by Real
return ToRealPositiveNoSplit<Real>(decimal, scale);
}

// Split the decimal into whole and fractional parts to avoid precision loss
BasicDecimal256 whole_decimal, fraction_decimal;
decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);

Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);
return whole + fraction;
}
};

} // namespace
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/util/decimal_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ struct RealTraits<float> {
static constexpr int kMantissaBits = 24;
// ceil(log10(2 ^ kMantissaBits))
static constexpr int kMantissaDigits = 8;
// Integers between zero and kMaxPreciseInteger can be precisely represented
static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
};

template <>
Expand All @@ -464,6 +466,8 @@ struct RealTraits<double> {
static constexpr int kMantissaBits = 53;
// ceil(log10(2 ^ kMantissaBits))
static constexpr int kMantissaDigits = 16;
// Integers between zero and kMaxPreciseInteger can be precisely represented
static constexpr uint64_t kMaxPreciseInteger = 1ULL << kMantissaBits;
pitrou marked this conversation as resolved.
Show resolved Hide resolved
};

template <typename DecimalType>
Expand Down
148 changes: 107 additions & 41 deletions cpp/src/arrow/util/decimal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex
<< "Decimal value: " << decimal_value << " Scale: " << scale;
}

template <typename Decimal, typename Real>
void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale,
Real expected) {
Decimal dec(decimal_value);
auto result = dec.template ToReal<Real>(scale);
ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) ||
result == std::nextafter(expected, expected - 1))
<< "Decimal value: " << decimal_value << " Scale: " << scale;
}

template <typename Decimal, typename Real>
void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale,
Real epsilon, Real expected) {
Decimal dec(decimal_value);
ASSERT_TRUE(std::abs(dec.template ToReal<Real>(scale) - expected) <= epsilon)
<< "Decimal value: " << decimal_value << " Scale: " << scale;
}

template <typename Decimal>
void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
float expected) {
Expand Down Expand Up @@ -1110,59 +1128,79 @@ class TestDecimalToReal : public ::testing::Test {
}
}
}
};

// Test precision of conversions to float values
void TestPrecision() {
// 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, -9.223373e+18f);
// 2**64 + 2**41 (exactly representable in a float)
CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 1.8446746e+19f);
CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, -1.8446746e+19f);
}
TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }

// Custom test for Decimal::ToReal<float>
template <typename DecimalType>
class TestDecimalToRealFloat : public TestDecimalToReal<std::pair<DecimalType, float>> {};
TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes);

// Test conversions with a range of scales
void TestLargeValues(int32_t max_scale) {
// Note that exact comparisons would succeed on some platforms (Linux, macOS).
// Nevertheless, power-of-ten factors are not all exactly representable
// in binary floating point.
for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
TYPED_TEST(TestDecimalToRealFloat, LargeValues) {
auto max_scale = TypeParam::kMaxScale;
// Note that exact comparisons would succeed on some platforms (Linux, macOS).
// Nevertheless, power-of-ten factors are not all exactly representable
// in binary floating point.
for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
#ifdef _WIN32
// MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
if (scale == 45) continue;
// MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
if (scale == 45) continue;
#endif
CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
}
for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
CheckDecimalToRealApprox<TypeParam>("1", scale, this->Pow10(-scale));
}
for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
#ifdef _WIN32
// MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
if (scale == 45) continue;
// MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
if (scale == 45) continue;
#endif
const Real factor = static_cast<Real>(123);
CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
}
const auto factor = static_cast<float>(123);
CheckDecimalToRealApprox<TypeParam>("123", scale, factor * this->Pow10(-scale));
}
};

TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);

TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
}

// Custom test for Decimal128::ToReal<float>
class TestDecimal128ToRealFloat : public TestDecimalToReal<std::pair<Decimal128, float>> {
};
TEST_F(TestDecimal128ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/38); }
TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); }
// Custom test for Decimal256::ToReal<float>
class TestDecimal256ToRealFloat : public TestDecimalToReal<std::pair<Decimal256, float>> {
};
TEST_F(TestDecimal256ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/76); }
TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); }
TYPED_TEST(TestDecimalToRealFloat, Precision) {
// 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
CheckDecimalToReal<TypeParam, float>("9223373136366403584", 0, 9.223373e+18f);
CheckDecimalToReal<TypeParam, float>("-9223373136366403584", 0, -9.223373e+18f);
// 2**64 + 2**41 (exactly representable in a float)
CheckDecimalToReal<TypeParam, float>("18446746272732807168", 0, 1.8446746e+19f);
CheckDecimalToReal<TypeParam, float>("-18446746272732807168", 0, -1.8446746e+19f);

// Integers are always exact
auto scale = TypeParam::kMaxScale - 1;
std::string seven = "7.";
seven.append(scale, '0'); // pad with trailing zeros
CheckDecimalToReal<TypeParam, float>(seven, scale, 7.0f);
CheckDecimalToReal<TypeParam, float>("-" + seven, scale, -7.0f);

CheckDecimalToReal<TypeParam, float>("99999999999999999999.0000000000000000", 16,
99999999999999999999.0f);
CheckDecimalToReal<TypeParam, float>("-99999999999999999999.0000000000000000", 16,
-99999999999999999999.0f);

// Small fractions are within one ULP
CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.9", 1, 9999999.9f);
CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.9", 1, -9999999.9f);
CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.999999", 6, 9999999.999999f);
CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.999999", 6,
-9999999.999999f);

// Large fractions are within 2^-23
constexpr float epsilon = 1.1920928955078125e-07f; // 2^-23
CheckDecimalToRealWithinEpsilon<TypeParam, float>(
"112334829348925.99070703983306884765625", 23, epsilon,
112334829348925.99070703983306884765625f);
CheckDecimalToRealWithinEpsilon<TypeParam, float>(
"1.987748987892758765582589910934859345", 36, epsilon,
1.987748987892758765582589910934859345f);
}

// ToReal<double> tests are disabled on MinGW because of precision issues in results
#ifndef __MINGW32__

// Custom test for Decimal128::ToReal<double>
// Custom test for Decimal::ToReal<double>
template <typename DecimalType>
class TestDecimalToRealDouble : public TestDecimalToReal<std::pair<DecimalType, double>> {
};
Expand Down Expand Up @@ -1209,6 +1247,34 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) {
9.999999999999998e+47);
CheckDecimalToReal<TypeParam, double>("-99999999999999978859343891977453174784", -10,
-9.999999999999998e+47);
// Integers are always exact
auto scale = TypeParam::kMaxScale - 1;
std::string seven = "7.";
seven.append(scale, '0');
CheckDecimalToReal<TypeParam, double>(seven, scale, 7.0);
CheckDecimalToReal<TypeParam, double>("-" + seven, scale, -7.0);

CheckDecimalToReal<TypeParam, double>("99999999999999999999.0000000000000000", 16,
99999999999999999999.0);
CheckDecimalToReal<TypeParam, double>("-99999999999999999999.0000000000000000", 16,
-99999999999999999999.0);

// Small fractions are within one ULP
CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.9", 1, 9999999.9);
CheckDecimalToRealWithinOneULP<TypeParam, double>("-9999999.9", 1, -9999999.9);
CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.999999999999999", 15,
9999999.999999999999999);
CheckDecimalToRealWithinOneULP<TypeParam, double>("-9999999.999999999999999", 15,
-9999999.999999999999999);

// Large fractions are within 2^-52
constexpr double epsilon = 2.220446049250313080847263336181640625e-16; // 2^-52
CheckDecimalToRealWithinEpsilon<TypeParam, double>(
"112334829348925.99070703983306884765625", 23, epsilon,
112334829348925.99070703983306884765625);
CheckDecimalToRealWithinEpsilon<TypeParam, double>(
"1.987748987892758765582589910934859345", 36, epsilon,
1.987748987892758765582589910934859345);
}

#endif // __MINGW32__
Expand Down
Loading