From 2d05cb0ffdc9ee25c38f23ea4e5e37514735e218 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Thu, 13 Jul 2023 07:52:37 +0800 Subject: [PATCH 01/11] GH-35942: [C++] Improve Decimal ToReal accuracy --- .../arrow/compute/kernels/scalar_cast_test.cc | 3 +- cpp/src/arrow/util/basic_decimal.cc | 10 +++++ cpp/src/arrow/util/basic_decimal.h | 4 ++ cpp/src/arrow/util/decimal.cc | 43 ++++++++++++++++++- cpp/src/arrow/util/decimal_internal.h | 4 ++ cpp/src/arrow/util/decimal_test.cc | 28 ++++++++++++ 6 files changed, 89 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 083a85eb346c5..1db06a762544b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) { } } - // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() + // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in + // decimal_test.cc } TEST(Cast, DecimalToString) { diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc index f2fd39d6f37ad..0835ab9074a48 100644 --- a/cpp/src/arrow/util/basic_decimal.cc +++ b/cpp/src/arrow/util/basic_decimal.cc @@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) const { return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision]; } +void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole, + BasicDecimal256* fraction) const { + DCHECK_GE(scale, 0); + DCHECK_LE(scale, 76); + + BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]); + auto s = Divide(multiplier, whole, fraction); + DCHECK_EQ(s, DecimalStatus::kSuccess); +} + const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) { DCHECK_GE(scale, 0); DCHECK_LE(scale, 76); diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h index b263bb234a795..d8a91ea76b390 100644 --- a/cpp/src/arrow/util/basic_decimal.h +++ b/cpp/src/arrow/util/basic_decimal.h @@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal - static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) { Real x = RealTraits::two_to_64(static_cast(decimal.high_bits())); x += static_cast(decimal.low_bits()); x *= LargePowerOfTen(-scale); + return x; } + + template + static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { + if (scale <= 0 || (decimal.high_bits() == 0 && + decimal.low_bits() <= RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split decimal into whole and fractional parts to avoid precision loss + BasicDecimal128 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + + return whole + fraction; + } }; } // namespace @@ -967,7 +987,7 @@ struct Decimal256RealConversion } template - static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) { DCHECK_GE(decimal, 0); Real x = 0; const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); @@ -978,6 +998,25 @@ struct Decimal256RealConversion x *= LargePowerOfTen(-scale); return x; } + + template + static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { + const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); + if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 && + parts_le[0] < RealTraits::kMaxPreciseInteger)) { + // No need to split the decimal if it is already an integer (scale <= 0) or if it + // can be precisely represented by Real + return ToRealPositiveNoSplit(decimal, scale); + } + + // Split the decimal into whole and fractional parts to avoid precision loss + BasicDecimal256 whole_decimal, fraction_decimal; + decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal); + + Real whole = ToRealPositiveNoSplit(whole_decimal, 0); + Real fraction = ToRealPositiveNoSplit(fraction_decimal, scale); + return whole + fraction; + } }; } // namespace diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h index 041aac4ef860d..7003f14debf86 100644 --- a/cpp/src/arrow/util/decimal_internal.h +++ b/cpp/src/arrow/util/decimal_internal.h @@ -451,6 +451,8 @@ struct RealTraits { static constexpr int kMantissaBits = 24; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 8; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = 1ULL << kMantissaBits; }; template <> @@ -464,6 +466,8 @@ struct RealTraits { static constexpr int kMantissaBits = 53; // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 16; + // Integers between zero and kMaxPreciseInteger can be precisely represented + static constexpr uint64_t kMaxPreciseInteger = 1ULL << kMantissaBits; }; template diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 1401750ce76d6..8ee6cfd86a467 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1119,6 +1119,23 @@ class TestDecimalToReal : public ::testing::Test { // 2**64 + 2**41 (exactly representable in a float) CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); + // Small number but large scale + auto scale = Decimal::kMaxScale - 1; + std::string seven = "7."; + for (int32_t i = 0; i < scale; ++i) { + seven += "0"; + } + CheckDecimalToReal(seven, scale, 7.0f); + CheckDecimalToReal("-" + seven, scale, -7.0f); + + // Decimal >= max mantisa integer + + /** The following cases still fail because 9999999 * 1e-1 != 999999.9f + CheckDecimalToReal("999999.9", 1, 999999.9); + CheckDecimalToReal("-999999.9", 1, -999999.9); + */ + CheckDecimalToReal("9999999.9", 1, 9999999.9); + CheckDecimalToReal("-9999999.9", 1, -9999999.9); } // Test conversions with a range of scales @@ -1209,6 +1226,17 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) { 9.999999999999998e+47); CheckDecimalToReal("-99999999999999978859343891977453174784", -10, -9.999999999999998e+47); + // Small number but large scale + std::string seven = "7."; + for (int32_t i = 0; i < TypeParam::kMaxScale - 1; ++i) { + seven += "0"; + } + CheckDecimalToReal(seven, TypeParam::kMaxScale - 1, 7.0f); + CheckDecimalToReal("-" + seven, TypeParam::kMaxScale - 1, -7.0f); + + // Decimal >= max mantisa integer + CheckDecimalToReal("999999999999999.9", 1, 999999999999999.9); + CheckDecimalToReal("-999999999999999.9", 1, -999999999999999.9); } #endif // __MINGW32__ From 540b122849944c538b8750bcea2d396ca056d988 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Thu, 13 Jul 2023 09:13:11 +0800 Subject: [PATCH 02/11] remove useless test --- cpp/src/arrow/util/decimal_test.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 8ee6cfd86a467..0898a9fd8dfee 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1129,11 +1129,6 @@ class TestDecimalToReal : public ::testing::Test { CheckDecimalToReal("-" + seven, scale, -7.0f); // Decimal >= max mantisa integer - - /** The following cases still fail because 9999999 * 1e-1 != 999999.9f - CheckDecimalToReal("999999.9", 1, 999999.9); - CheckDecimalToReal("-999999.9", 1, -999999.9); - */ CheckDecimalToReal("9999999.9", 1, 9999999.9); CheckDecimalToReal("-9999999.9", 1, -9999999.9); } From 4043c06d34faae5ea31c832bae3cafd1281fa0a2 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Sun, 16 Jul 2023 18:33:34 +0800 Subject: [PATCH 03/11] update doc and test cases --- cpp/src/arrow/util/decimal.cc | 14 ++++++ cpp/src/arrow/util/decimal_test.cc | 79 +++++++++++++++++++++++++----- 2 files changed, 81 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 759e99e436780..2c2b336b1c128 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -313,6 +313,13 @@ struct Decimal128RealConversion return x; } + /// An appoximate conversion from Decimal128 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact value + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value + /// Here "exact" means the closest representable value by Real. template static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { if (scale <= 0 || (decimal.high_bits() == 0 && @@ -999,6 +1006,13 @@ struct Decimal256RealConversion return x; } + /// An appoximate conversion from Decimal128 to Real that guarantees: + /// 1. If the decimal is an integer, the conversion is exact. + /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact value + /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value + /// Here "exact" means the closest representable value by Real. template static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 0898a9fd8dfee..7a6bbdf399002 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex << "Decimal value: " << decimal_value << " Scale: " << scale; } +template +void CheckDecimalToRealWithinByOneULP(const std::string& decimal_value, int32_t scale, + Real expected) { + Decimal dec(decimal_value); + auto result = dec.template ToReal(scale); + ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) || + result == std::nextafter(expected, expected - 1)) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + +template +void CheckDecimalToRealWithinByEpsilon(const std::string& decimal_value, int32_t scale, + Real epsilon, Real expected) { + Decimal dec(decimal_value); + ASSERT_TRUE(std::abs(dec.template ToReal(scale) - expected) <= epsilon) + << "Decimal value: " << decimal_value << " Scale: " << scale; +} + template void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale, float expected) { @@ -1119,7 +1137,8 @@ class TestDecimalToReal : public ::testing::Test { // 2**64 + 2**41 (exactly representable in a float) CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); - // Small number but large scale + + // Integers are always exact auto scale = Decimal::kMaxScale - 1; std::string seven = "7."; for (int32_t i = 0; i < scale; ++i) { @@ -1128,9 +1147,26 @@ class TestDecimalToReal : public ::testing::Test { CheckDecimalToReal(seven, scale, 7.0f); CheckDecimalToReal("-" + seven, scale, -7.0f); - // Decimal >= max mantisa integer - CheckDecimalToReal("9999999.9", 1, 9999999.9); - CheckDecimalToReal("-9999999.9", 1, -9999999.9); + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0f); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0f); + + // Small fractions are within one ULP + CheckDecimalToRealWithinByOneULP("9999999.9", 1, 9999999.9f); + CheckDecimalToRealWithinByOneULP("-9999999.9", 1, -9999999.9f); + CheckDecimalToRealWithinByOneULP("9999999.999999", 6, 9999999.999999f); + CheckDecimalToRealWithinByOneULP("-9999999.999999", 6, + -9999999.999999f); + + // Large fractions are within 2^-23 + constexpr Real epsilon = 1.1920928955078125e-07f; // 2^-23 + CheckDecimalToRealWithinByEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625f); + CheckDecimalToRealWithinByEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345f); } // Test conversions with a range of scales @@ -1221,17 +1257,36 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) { 9.999999999999998e+47); CheckDecimalToReal("-99999999999999978859343891977453174784", -10, -9.999999999999998e+47); - // Small number but large scale + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; std::string seven = "7."; - for (int32_t i = 0; i < TypeParam::kMaxScale - 1; ++i) { + for (int32_t i = 0; i < scale; ++i) { seven += "0"; } - CheckDecimalToReal(seven, TypeParam::kMaxScale - 1, 7.0f); - CheckDecimalToReal("-" + seven, TypeParam::kMaxScale - 1, -7.0f); - - // Decimal >= max mantisa integer - CheckDecimalToReal("999999999999999.9", 1, 999999999999999.9); - CheckDecimalToReal("-999999999999999.9", 1, -999999999999999.9); + CheckDecimalToReal(seven, scale, 7.0); + CheckDecimalToReal("-" + seven, scale, -7.0); + + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0); + + // Small fractions are within one ULP + CheckDecimalToRealWithinByOneULP("9999999.9", 1, 9999999.9); + CheckDecimalToRealWithinByOneULP("-9999999.9", 1, -9999999.9); + CheckDecimalToRealWithinByOneULP("9999999.999999999999999", 15, + 9999999.999999999999999); + CheckDecimalToRealWithinByOneULP("-9999999.999999999999999", 15, + -9999999.999999999999999); + + // Large fractions are within 2^-52 + constexpr double epsilon = 2.220446049250313080847263336181640625e-16; // 2^-52 + CheckDecimalToRealWithinByEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625); + CheckDecimalToRealWithinByEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345); } #endif // __MINGW32__ From 55b3cdd654db6bbd53c7fa886523147613822dc0 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Sun, 16 Jul 2023 18:38:33 +0800 Subject: [PATCH 04/11] fix doc --- cpp/src/arrow/util/decimal.cc | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 2c2b336b1c128..4d8c83b7526f4 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -316,10 +316,11 @@ struct Decimal128RealConversion /// An appoximate conversion from Decimal128 to Real that guarantees: /// 1. If the decimal is an integer, the conversion is exact. /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. - /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact value + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) - /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value - /// Here "exact" means the closest representable value by Real. + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. template static Real ToRealPositive(const Decimal128& decimal, int32_t scale) { if (scale <= 0 || (decimal.high_bits() == 0 && @@ -1006,13 +1007,14 @@ struct Decimal256RealConversion return x; } - /// An appoximate conversion from Decimal128 to Real that guarantees: + /// An appoximate conversion from Decimal256 to Real that guarantees: /// 1. If the decimal is an integer, the conversion is exact. /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. - /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact value + /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact + /// value. /// 3. Otherwise, the conversion is within 2^(-RealTraits::kMantissaDigits+1) - /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value - /// Here "exact" means the closest representable value by Real. + /// (e.g. 2^-23 for float and 2^-52 for double) of the exact value. + /// Here "exact value" means the closest representable value by Real. template static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); From 27343c61c4bd40464e70c2f3386d497e42b04b67 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Sun, 16 Jul 2023 18:50:17 +0800 Subject: [PATCH 05/11] minor fix --- cpp/src/arrow/util/decimal.cc | 1 - cpp/src/arrow/util/decimal_test.cc | 37 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 4d8c83b7526f4..d827d6318d905 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -309,7 +309,6 @@ struct Decimal128RealConversion Real x = RealTraits::two_to_64(static_cast(decimal.high_bits())); x += static_cast(decimal.low_bits()); x *= LargePowerOfTen(-scale); - return x; } diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 7a6bbdf399002..fab2f8ae80140 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1051,8 +1051,8 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex } template -void CheckDecimalToRealWithinByOneULP(const std::string& decimal_value, int32_t scale, - Real expected) { +void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale, + Real expected) { Decimal dec(decimal_value); auto result = dec.template ToReal(scale); ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) || @@ -1061,8 +1061,8 @@ void CheckDecimalToRealWithinByOneULP(const std::string& decimal_value, int32_t } template -void CheckDecimalToRealWithinByEpsilon(const std::string& decimal_value, int32_t scale, - Real epsilon, Real expected) { +void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale, + Real epsilon, Real expected) { Decimal dec(decimal_value); ASSERT_TRUE(std::abs(dec.template ToReal(scale) - expected) <= epsilon) << "Decimal value: " << decimal_value << " Scale: " << scale; @@ -1153,18 +1153,17 @@ class TestDecimalToReal : public ::testing::Test { -99999999999999999999.0f); // Small fractions are within one ULP - CheckDecimalToRealWithinByOneULP("9999999.9", 1, 9999999.9f); - CheckDecimalToRealWithinByOneULP("-9999999.9", 1, -9999999.9f); - CheckDecimalToRealWithinByOneULP("9999999.999999", 6, 9999999.999999f); - CheckDecimalToRealWithinByOneULP("-9999999.999999", 6, - -9999999.999999f); + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9f); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9f); + CheckDecimalToRealWithinOneULP("9999999.999999", 6, 9999999.999999f); + CheckDecimalToRealWithinOneULP("-9999999.999999", 6, -9999999.999999f); // Large fractions are within 2^-23 constexpr Real epsilon = 1.1920928955078125e-07f; // 2^-23 - CheckDecimalToRealWithinByEpsilon( + CheckDecimalToRealWithinEpsilon( "112334829348925.99070703983306884765625", 23, epsilon, 112334829348925.99070703983306884765625f); - CheckDecimalToRealWithinByEpsilon( + CheckDecimalToRealWithinEpsilon( "1.987748987892758765582589910934859345", 36, epsilon, 1.987748987892758765582589910934859345f); } @@ -1272,19 +1271,19 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) { -99999999999999999999.0); // Small fractions are within one ULP - CheckDecimalToRealWithinByOneULP("9999999.9", 1, 9999999.9); - CheckDecimalToRealWithinByOneULP("-9999999.9", 1, -9999999.9); - CheckDecimalToRealWithinByOneULP("9999999.999999999999999", 15, - 9999999.999999999999999); - CheckDecimalToRealWithinByOneULP("-9999999.999999999999999", 15, - -9999999.999999999999999); + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9); + CheckDecimalToRealWithinOneULP("9999999.999999999999999", 15, + 9999999.999999999999999); + CheckDecimalToRealWithinOneULP("-9999999.999999999999999", 15, + -9999999.999999999999999); // Large fractions are within 2^-52 constexpr double epsilon = 2.220446049250313080847263336181640625e-16; // 2^-52 - CheckDecimalToRealWithinByEpsilon( + CheckDecimalToRealWithinEpsilon( "112334829348925.99070703983306884765625", 23, epsilon, 112334829348925.99070703983306884765625); - CheckDecimalToRealWithinByEpsilon( + CheckDecimalToRealWithinEpsilon( "1.987748987892758765582589910934859345", 36, epsilon, 1.987748987892758765582589910934859345); } From dbd7cdd6ddd475967f193a8a0640fe36da398da1 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Mon, 17 Jul 2023 22:55:09 +0800 Subject: [PATCH 06/11] Update cpp/src/arrow/util/decimal.cc Co-authored-by: Antoine Pitrou --- cpp/src/arrow/util/decimal.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index d827d6318d905..704b6bb9d491d 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -1016,7 +1016,7 @@ struct Decimal256RealConversion /// Here "exact value" means the closest representable value by Real. template static Real ToRealPositive(const Decimal256& decimal, int32_t scale) { - const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array()); + const auto parts_le = decimal.little_endian_array(); if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 && parts_le[0] < RealTraits::kMaxPreciseInteger)) { // No need to split the decimal if it is already an integer (scale <= 0) or if it From d101b54b263361f29bdf1430d18f4b474cc3ca0c Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Mon, 17 Jul 2023 22:55:15 +0800 Subject: [PATCH 07/11] Update cpp/src/arrow/util/decimal_internal.h Co-authored-by: Antoine Pitrou --- cpp/src/arrow/util/decimal_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h index 7003f14debf86..cd5cb3ecb21c0 100644 --- a/cpp/src/arrow/util/decimal_internal.h +++ b/cpp/src/arrow/util/decimal_internal.h @@ -452,7 +452,7 @@ struct RealTraits { // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 8; // Integers between zero and kMaxPreciseInteger can be precisely represented - static constexpr uint64_t kMaxPreciseInteger = 1ULL << kMantissaBits; + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template <> From a3319659c066f898c5450a162df1de11ef7604f4 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Mon, 17 Jul 2023 22:55:25 +0800 Subject: [PATCH 08/11] Update cpp/src/arrow/util/decimal_test.cc Co-authored-by: Antoine Pitrou --- cpp/src/arrow/util/decimal_test.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index fab2f8ae80140..fceeb5a1ba13f 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1141,9 +1141,7 @@ class TestDecimalToReal : public ::testing::Test { // Integers are always exact auto scale = Decimal::kMaxScale - 1; std::string seven = "7."; - for (int32_t i = 0; i < scale; ++i) { - seven += "0"; - } + seven.append(scale, '0'); // pad with trailing zeros CheckDecimalToReal(seven, scale, 7.0f); CheckDecimalToReal("-" + seven, scale, -7.0f); From 2dce86c12bc8671a709e22a1a182981553f8048a Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Mon, 17 Jul 2023 07:30:11 +0800 Subject: [PATCH 09/11] update test --- cpp/src/arrow/util/decimal_test.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index fceeb5a1ba13f..451445b3093af 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1131,6 +1131,9 @@ class TestDecimalToReal : public ::testing::Test { // Test precision of conversions to float values void TestPrecision() { + static_assert(std::is_same_v, + "double is tested in TestDecimalToRealDouble::Precision, not here"); + // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); @@ -1257,9 +1260,7 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) { // Integers are always exact auto scale = TypeParam::kMaxScale - 1; std::string seven = "7."; - for (int32_t i = 0; i < scale; ++i) { - seven += "0"; - } + seven.append(scale, '0'); CheckDecimalToReal(seven, scale, 7.0); CheckDecimalToReal("-" + seven, scale, -7.0); From d0c5ffaf9a4b722f7289f329a110e22dc2dbcecc Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Mon, 17 Jul 2023 11:55:41 +0800 Subject: [PATCH 10/11] remove fake type param --- cpp/src/arrow/util/decimal_test.cc | 130 +++++++++++++---------------- 1 file changed, 60 insertions(+), 70 deletions(-) diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc index 451445b3093af..6376a9545a0f8 100644 --- a/cpp/src/arrow/util/decimal_test.cc +++ b/cpp/src/arrow/util/decimal_test.cc @@ -1128,89 +1128,79 @@ class TestDecimalToReal : public ::testing::Test { } } } +}; + +TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); +TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } - // Test precision of conversions to float values - void TestPrecision() { - static_assert(std::is_same_v, - "double is tested in TestDecimalToRealDouble::Precision, not here"); - - // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) - CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); - CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); - // 2**64 + 2**41 (exactly representable in a float) - CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); - CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); - - // Integers are always exact - auto scale = Decimal::kMaxScale - 1; - std::string seven = "7."; - seven.append(scale, '0'); // pad with trailing zeros - CheckDecimalToReal(seven, scale, 7.0f); - CheckDecimalToReal("-" + seven, scale, -7.0f); - - CheckDecimalToReal("99999999999999999999.0000000000000000", 16, - 99999999999999999999.0f); - CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, - -99999999999999999999.0f); - - // Small fractions are within one ULP - CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9f); - CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9f); - CheckDecimalToRealWithinOneULP("9999999.999999", 6, 9999999.999999f); - CheckDecimalToRealWithinOneULP("-9999999.999999", 6, -9999999.999999f); - - // Large fractions are within 2^-23 - constexpr Real epsilon = 1.1920928955078125e-07f; // 2^-23 - CheckDecimalToRealWithinEpsilon( - "112334829348925.99070703983306884765625", 23, epsilon, - 112334829348925.99070703983306884765625f); - CheckDecimalToRealWithinEpsilon( - "1.987748987892758765582589910934859345", 36, epsilon, - 1.987748987892758765582589910934859345f); - } - - // Test conversions with a range of scales - void TestLargeValues(int32_t max_scale) { - // Note that exact comparisons would succeed on some platforms (Linux, macOS). - // Nevertheless, power-of-ten factors are not all exactly representable - // in binary floating point. - for (int32_t scale = -max_scale; scale <= max_scale; scale++) { +// Custom test for Decimal::ToReal +template +class TestDecimalToRealFloat : public TestDecimalToReal> {}; +TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes); + +TYPED_TEST(TestDecimalToRealFloat, LargeValues) { + auto max_scale = TypeParam::kMaxScale; + // Note that exact comparisons would succeed on some platforms (Linux, macOS). + // Nevertheless, power-of-ten factors are not all exactly representable + // in binary floating point. + for (int32_t scale = -max_scale; scale <= max_scale; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - CheckDecimalToRealApprox("1", scale, Pow10(-scale)); - } - for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { + CheckDecimalToRealApprox("1", scale, this->Pow10(-scale)); + } + for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) { #ifdef _WIN32 - // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero - if (scale == 45) continue; + // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero + if (scale == 45) continue; #endif - const Real factor = static_cast(123); - CheckDecimalToRealApprox("123", scale, factor * Pow10(-scale)); - } + const auto factor = static_cast(123); + CheckDecimalToRealApprox("123", scale, factor * this->Pow10(-scale)); } -}; +} -TYPED_TEST_SUITE(TestDecimalToReal, RealTypes); +TYPED_TEST(TestDecimalToRealFloat, Precision) { + // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision) + CheckDecimalToReal("9223373136366403584", 0, 9.223373e+18f); + CheckDecimalToReal("-9223373136366403584", 0, -9.223373e+18f); + // 2**64 + 2**41 (exactly representable in a float) + CheckDecimalToReal("18446746272732807168", 0, 1.8446746e+19f); + CheckDecimalToReal("-18446746272732807168", 0, -1.8446746e+19f); -TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); } + // Integers are always exact + auto scale = TypeParam::kMaxScale - 1; + std::string seven = "7."; + seven.append(scale, '0'); // pad with trailing zeros + CheckDecimalToReal(seven, scale, 7.0f); + CheckDecimalToReal("-" + seven, scale, -7.0f); -// Custom test for Decimal128::ToReal -class TestDecimal128ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal128ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/38); } -TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); } -// Custom test for Decimal256::ToReal -class TestDecimal256ToRealFloat : public TestDecimalToReal> { -}; -TEST_F(TestDecimal256ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/76); } -TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); } + CheckDecimalToReal("99999999999999999999.0000000000000000", 16, + 99999999999999999999.0f); + CheckDecimalToReal("-99999999999999999999.0000000000000000", 16, + -99999999999999999999.0f); + + // Small fractions are within one ULP + CheckDecimalToRealWithinOneULP("9999999.9", 1, 9999999.9f); + CheckDecimalToRealWithinOneULP("-9999999.9", 1, -9999999.9f); + CheckDecimalToRealWithinOneULP("9999999.999999", 6, 9999999.999999f); + CheckDecimalToRealWithinOneULP("-9999999.999999", 6, + -9999999.999999f); + + // Large fractions are within 2^-23 + constexpr float epsilon = 1.1920928955078125e-07f; // 2^-23 + CheckDecimalToRealWithinEpsilon( + "112334829348925.99070703983306884765625", 23, epsilon, + 112334829348925.99070703983306884765625f); + CheckDecimalToRealWithinEpsilon( + "1.987748987892758765582589910934859345", 36, epsilon, + 1.987748987892758765582589910934859345f); +} // ToReal tests are disabled on MinGW because of precision issues in results #ifndef __MINGW32__ -// Custom test for Decimal128::ToReal +// Custom test for Decimal::ToReal template class TestDecimalToRealDouble : public TestDecimalToReal> { }; From 227e260d824951f5e7cf9f6847493075bf096d57 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 18 Jul 2023 14:58:45 +0200 Subject: [PATCH 11/11] Fix off by one error --- cpp/src/arrow/util/decimal_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h index cd5cb3ecb21c0..51a7229ab6678 100644 --- a/cpp/src/arrow/util/decimal_internal.h +++ b/cpp/src/arrow/util/decimal_internal.h @@ -467,7 +467,7 @@ struct RealTraits { // ceil(log10(2 ^ kMantissaBits)) static constexpr int kMantissaDigits = 16; // Integers between zero and kMaxPreciseInteger can be precisely represented - static constexpr uint64_t kMaxPreciseInteger = 1ULL << kMantissaBits; + static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1; }; template