diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 083a85eb346c5..1db06a762544b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) { } } - // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() + // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in + // decimal_test.cc } TEST(Cast, DecimalToString) { diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index f2fd39d6f37ad..0835ab9074a48 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) const { return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision]; } +void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole, + BasicDecimal256* fraction) const { + DCHECK_GE(scale, 0); + DCHECK_LE(scale, 76); + + BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]); + auto s = Divide(multiplier, whole, fraction); + DCHECK_EQ(s, DecimalStatus::kSuccess); +} + const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 76); diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index b263bb234a795..d8a91ea76b390 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal - static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) { Real x = RealTraits::two_to_64(static_cast(decimal.high_bits())); x += static_cast(decimal.low_bits()); x *= LargePowerOfTen(-scale); return x; } + + /// An appoximate conversion from Decimal128 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. + template + static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + if (scale <= 0 || (decimal.high_bits() == 0 && + decimal.low_bits() <= RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split decimal into whole and fractional parts to avoid precision loss + BasicDecimal128 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + + return whole + fraction; + } }; } // namespace @@ -967,7 +994,7 @@ struct Decimal256RealConversion } template - static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) { DCHECK_GE(decimal, 0); Real x = 0; const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); @@ -978,6 +1005,33 @@ struct Decimal256RealConversion x *= LargePowerOfTen(-scale); return x; } + + /// An appoximate conversion from Decimal256 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. + template + static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + const auto parts_le = decimal.little_endian_array(); + if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 && + parts_le[0] < RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split the decimal into whole and fractional parts to avoid precision loss + BasicDecimal256 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + return whole + fraction; + } }; } // namespace diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h index 041aac4ef860d..51a7229ab6678 100644 --- a/cpp/src/arrow/util/decimal_internal.h +++ b/cpp/src/arrow/util/decimal_internal.h @@ -451,6 +451,8 @@ struct RealTraits { static constexpr int kMantissaBits = 24; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 8; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template <> @@ -464,6 +466,8 @@ struct RealTraits { static constexpr int kMantissaBits = 53; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 16; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 1401750ce76d6..6376a9545a0f8 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex << "Decimal value: " << decimal_value << " Scale: " << scale; } +template +void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale, + Real expected) { + Decimal dec(decimal_value); + auto result = dec.template ToReal(scale); + ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) || + result == std::nextafter(expected, expected - 1)) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + +template +void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale, + Real epsilon, Real expected) { + Decimal dec(decimal_value); + ASSERT_TRUE(std::abs(dec.template ToReal(scale) - expected) <= epsilon) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + template void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale, float expected) { @@ -1110,59 +1128,79 @@ class TestDecimalToReal : public ::testing::Test { } } } +}; - // Test precision of conversions to float values - void TestPrecision() { - // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) - CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); - CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); - // 2**64 + 2**41 (exactly representable in a float) - CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); - CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); - } +TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); +TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } + +// Custom test for Decimal::ToReal +template +class TestDecimalToRealFloat : public TestDecimalToReal> {}; +TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes); - // Test conversions with a range of scales - void TestLargeValues(int32_t max_scale) { - // Note that exact comparisons would succeed on some platforms (Linux, macOS). - // Nevertheless, power-of-ten factors are not all exactly representable - // in binary floating point. - for (int32_t scale = -max_scale; scale <= max_scale; scale++) { +TYPED_TEST(TestDecimalToRealFloat, LargeValues) { + auto max_scale = TypeParam::kMaxScale; + // Note that exact comparisons would succeed on some platforms (Linux, macOS). + // Nevertheless, power-of-ten factors are not all exactly representable + // in binary floating point. + for (int32_t scale = -max_scale; scale <= max_scale; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - CheckDecimalToRealApprox("1", scale, Pow10(-scale)); - } - for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { + CheckDecimalToRealApprox("1", scale, this->Pow10(-scale)); + } + for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - const Real factor = static_cast(123); - CheckDecimalToRealApprox("123", scale, factor * Pow10(-scale)); - } + const auto factor = static_cast(123); + CheckDecimalToRealApprox("123", scale, factor * this->Pow10(-scale)); } -}; - -TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); - -TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } +} -// Custom test for Decimal128::ToReal -class TestDecimal128ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal128ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/38); } -TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); } -// Custom test for Decimal256::ToReal -class TestDecimal256ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal256ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/76); } -TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); } +TYPED_TEST(TestDecimalToRealFloat, Precision) { + // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) + CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); + CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); + // 2**64 + 2**41 (exactly representable in a float) + CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); + CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); + + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; + std::string seven = "7."; + seven.append(scale, '0'); // pad with trailing zeros + CheckDecimalToReal(seven, scale, 7.0f); + CheckDecimalToReal("-" + seven, scale, -7.0f); + + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0f); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0f); + + // Small fractions are within one ULP + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9f); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9f); + CheckDecimalToRealWithinOneULP("9999999.999999", 6, 9999999.999999f); + CheckDecimalToRealWithinOneULP("-9999999.999999", 6, + -9999999.999999f); + + // Large fractions are within 2^-23 + constexpr float epsilon = 1.1920928955078125e-07f; // 2^-23 + CheckDecimalToRealWithinEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625f); + CheckDecimalToRealWithinEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345f); +} // ToReal tests are disabled on MinGW because of precision issues in results #ifndef __MINGW32__ -// Custom test for Decimal128::ToReal +// Custom test for Decimal::ToReal template class TestDecimalToRealDouble : public TestDecimalToReal> { }; @@ -1209,6 +1247,34 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) { 9.999999999999998e+47); CheckDecimalToReal("-99999999999999978859343891977453174784", -10, -9.999999999999998e+47); + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; + std::string seven = "7."; + seven.append(scale, '0'); + CheckDecimalToReal(seven, scale, 7.0); + CheckDecimalToReal("-" + seven, scale, -7.0); + + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0); + + // Small fractions are within one ULP + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9); + CheckDecimalToRealWithinOneULP("9999999.999999999999999", 15, + 9999999.999999999999999); + CheckDecimalToRealWithinOneULP("-9999999.999999999999999", 15, + -9999999.999999999999999); + + // Large fractions are within 2^-52 + constexpr double epsilon = 2.220446049250313080847263336181640625e-16; // 2^-52 + CheckDecimalToRealWithinEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625); + CheckDecimalToRealWithinEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345); } #endif // __MINGW32__