apache · pitrou · Jul 18, 2023 · Jul 12, 2023 · Jul 13, 2023 · Jul 16, 2023
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1025,7 +1025,8 @@ TEST(Cast, DecimalToFloating) {
     }
   }
 
-  // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal()
+  // Edge cases are tested for Decimal128::ToReal() and Decimal256::ToReal() in
+  // decimal_test.cc
 }
 
 TEST(Cast, DecimalToString) {

diff --git a/cpp/src/arrow/util/basic_decimal.cc b/cpp/src/arrow/util/basic_decimal.cc
@@ -969,6 +969,16 @@ bool BasicDecimal256::FitsInPrecision(int32_t precision) const {
   return BasicDecimal256::Abs(*this) < kDecimal256PowersOfTen[precision];
 }
 
+void BasicDecimal256::GetWholeAndFraction(int scale, BasicDecimal256* whole,
+                                          BasicDecimal256* fraction) const {
+  DCHECK_GE(scale, 0);
+  DCHECK_LE(scale, 76);
+
+  BasicDecimal256 multiplier(kDecimal256PowersOfTen[scale]);
+  auto s = Divide(multiplier, whole, fraction);
+  DCHECK_EQ(s, DecimalStatus::kSuccess);
+}
+
 const BasicDecimal256& BasicDecimal256::GetScaleMultiplier(int32_t scale) {
   DCHECK_GE(scale, 0);
   DCHECK_LE(scale, 76);

diff --git a/cpp/src/arrow/util/basic_decimal.h b/cpp/src/arrow/util/basic_decimal.h
@@ -366,6 +366,10 @@ class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal<BasicDecimal256,
   /// \brief Get the lowest bits of the two's complement representation of the number.
   uint64_t low_bits() const { return bit_util::little_endian::Make(array_)[0]; }
 
+  /// \brief separate the integer and fractional parts for the given scale.
+  void GetWholeAndFraction(int32_t scale, BasicDecimal256* whole,
+                           BasicDecimal256* fraction) const;
+
   /// \brief Scale multiplier for given scale value.
   static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
   /// \brief Half-scale multiplier for given scale value.

diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
@@ -305,12 +305,39 @@ struct Decimal128RealConversion
   }
 
   template <typename Real>
-  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+  static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) {
     Real x = RealTraits<Real>::two_to_64(static_cast<Real>(decimal.high_bits()));
     x += static_cast<Real>(decimal.low_bits());
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal128 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+    if (scale <= 0 || (decimal.high_bits() == 0 &&
+                       decimal.low_bits() <= RealTraits<Real>::kMaxPreciseInteger)) {
+      // No need to split the decimal if it is already an integer (scale <= 0) or if it
+      // can be precisely represented by Real
+      return ToRealPositiveNoSplit<Real>(decimal, scale);
+    }
+
+    // Split decimal into whole and fractional parts to avoid precision loss
+    BasicDecimal128 whole_decimal, fraction_decimal;
+    decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);
+
+    Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
+    Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);
+
+    return whole + fraction;
+  }
 };
 
 }  // namespace
@@ -967,7 +994,7 @@ struct Decimal256RealConversion
   }
 
   template <typename Real>
-  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+  static Real ToRealPositiveNoSplit(const Decimal256& decimal, int32_t scale) {
     DCHECK_GE(decimal, 0);
     Real x = 0;
     const auto parts_le = bit_util::little_endian::Make(decimal.native_endian_array());
@@ -978,6 +1005,33 @@ struct Decimal256RealConversion
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal256 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+    const auto parts_le = decimal.little_endian_array();
+    if (scale <= 0 || (parts_le[3] == 0 && parts_le[2] == 0 && parts_le[1] == 0 &&
+                       parts_le[0] < RealTraits<Real>::kMaxPreciseInteger)) {
+      // No need to split the decimal if it is already an integer (scale <= 0) or if it
+      // can be precisely represented by Real
+      return ToRealPositiveNoSplit<Real>(decimal, scale);
+    }
+
+    // Split the decimal into whole and fractional parts to avoid precision loss
+    BasicDecimal256 whole_decimal, fraction_decimal;
+    decimal.GetWholeAndFraction(scale, &whole_decimal, &fraction_decimal);
+
+    Real whole = ToRealPositiveNoSplit<Real>(whole_decimal, 0);
+    Real fraction = ToRealPositiveNoSplit<Real>(fraction_decimal, scale);
+    return whole + fraction;
+  }
 };
 
 }  // namespace

diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h
@@ -451,6 +451,8 @@ struct RealTraits<float> {
   static constexpr int kMantissaBits = 24;
   // ceil(log10(2 ^ kMantissaBits))
   static constexpr int kMantissaDigits = 8;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
 };
 
 template <>
@@ -464,6 +466,8 @@ struct RealTraits<double> {
   static constexpr int kMantissaBits = 53;
   // ceil(log10(2 ^ kMantissaBits))
   static constexpr int kMantissaDigits = 16;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = 1ULL << kMantissaBits;
 };
 
 template <typename DecimalType>

diff --git a/cpp/src/arrow/util/decimal_test.cc b/cpp/src/arrow/util/decimal_test.cc
@@ -1050,6 +1050,24 @@ void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real ex
       << "Decimal value: " << decimal_value << " Scale: " << scale;
 }
 
+template <typename Decimal, typename Real>
+void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale,
+                                    Real expected) {
+  Decimal dec(decimal_value);
+  auto result = dec.template ToReal<Real>(scale);
+  ASSERT_TRUE(result == expected || result == std::nextafter(expected, expected + 1) ||
+              result == std::nextafter(expected, expected - 1))
+      << "Decimal value: " << decimal_value << " Scale: " << scale;
+}
+
+template <typename Decimal, typename Real>
+void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale,
+                                     Real epsilon, Real expected) {
+  Decimal dec(decimal_value);
+  ASSERT_TRUE(std::abs(dec.template ToReal<Real>(scale) - expected) <= epsilon)
+      << "Decimal value: " << decimal_value << " Scale: " << scale;
+}
+
 template <typename Decimal>
 void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
                               float expected) {
@@ -1110,59 +1128,79 @@ class TestDecimalToReal : public ::testing::Test {
       }
     }
   }
+};
 
-  // Test precision of conversions to float values
-  void TestPrecision() {
-    // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
-    CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
-    CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, -9.223373e+18f);
-    // 2**64 + 2**41 (exactly representable in a float)
-    CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 1.8446746e+19f);
-    CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, -1.8446746e+19f);
-  }
+TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
+TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+
+// Custom test for Decimal::ToReal<float>
+template <typename DecimalType>
+class TestDecimalToRealFloat : public TestDecimalToReal<std::pair<DecimalType, float>> {};
+TYPED_TEST_SUITE(TestDecimalToRealFloat, DecimalTypes);
 
-  // Test conversions with a range of scales
-  void TestLargeValues(int32_t max_scale) {
-    // Note that exact comparisons would succeed on some platforms (Linux, macOS).
-    // Nevertheless, power-of-ten factors are not all exactly representable
-    // in binary floating point.
-    for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
+TYPED_TEST(TestDecimalToRealFloat, LargeValues) {
+  auto max_scale = TypeParam::kMaxScale;
+  // Note that exact comparisons would succeed on some platforms (Linux, macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
 #ifdef _WIN32
-      // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-      if (scale == 45) continue;
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
 #endif
-      CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
-    }
-    for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
+    CheckDecimalToRealApprox<TypeParam>("1", scale, this->Pow10(-scale));
+  }
+  for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
 #ifdef _WIN32
-      // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-      if (scale == 45) continue;
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
 #endif
-      const Real factor = static_cast<Real>(123);
-      CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
-    }
+    const auto factor = static_cast<float>(123);
+    CheckDecimalToRealApprox<TypeParam>("123", scale, factor * this->Pow10(-scale));
   }
-};
-
-TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
-
-TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+}
 
-// Custom test for Decimal128::ToReal<float>
-class TestDecimal128ToRealFloat : public TestDecimalToReal<std::pair<Decimal128, float>> {
-};
-TEST_F(TestDecimal128ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/38); }
-TEST_F(TestDecimal128ToRealFloat, Precision) { this->TestPrecision(); }
-// Custom test for Decimal256::ToReal<float>
-class TestDecimal256ToRealFloat : public TestDecimalToReal<std::pair<Decimal256, float>> {
-};
-TEST_F(TestDecimal256ToRealFloat, LargeValues) { TestLargeValues(/*max_scale=*/76); }
-TEST_F(TestDecimal256ToRealFloat, Precision) { this->TestPrecision(); }
+TYPED_TEST(TestDecimalToRealFloat, Precision) {
+  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
+  CheckDecimalToReal<TypeParam, float>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<TypeParam, float>("-9223373136366403584", 0, -9.223373e+18f);
+  // 2**64 + 2**41 (exactly representable in a float)
+  CheckDecimalToReal<TypeParam, float>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<TypeParam, float>("-18446746272732807168", 0, -1.8446746e+19f);
+
+  // Integers are always exact
+  auto scale = TypeParam::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');  // pad with trailing zeros
+  CheckDecimalToReal<TypeParam, float>(seven, scale, 7.0f);
+  CheckDecimalToReal<TypeParam, float>("-" + seven, scale, -7.0f);
+
+  CheckDecimalToReal<TypeParam, float>("99999999999999999999.0000000000000000", 16,
+                                       99999999999999999999.0f);
+  CheckDecimalToReal<TypeParam, float>("-99999999999999999999.0000000000000000", 16,
+                                       -99999999999999999999.0f);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.9", 1, 9999999.9f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.9", 1, -9999999.9f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("9999999.999999", 6, 9999999.999999f);
+  CheckDecimalToRealWithinOneULP<TypeParam, float>("-9999999.999999", 6,
+                                                   -9999999.999999f);
+
+  // Large fractions are within 2^-23
+  constexpr float epsilon = 1.1920928955078125e-07f;  // 2^-23
+  CheckDecimalToRealWithinEpsilon<TypeParam, float>(
+      "112334829348925.99070703983306884765625", 23, epsilon,
+      112334829348925.99070703983306884765625f);
+  CheckDecimalToRealWithinEpsilon<TypeParam, float>(
+      "1.987748987892758765582589910934859345", 36, epsilon,
+      1.987748987892758765582589910934859345f);
+}
 
 // ToReal<double> tests are disabled on MinGW because of precision issues in results
 #ifndef __MINGW32__
 
-// Custom test for Decimal128::ToReal<double>
+// Custom test for Decimal::ToReal<double>
 template <typename DecimalType>
 class TestDecimalToRealDouble : public TestDecimalToReal<std::pair<DecimalType, double>> {
 };
@@ -1209,6 +1247,34 @@ TYPED_TEST(TestDecimalToRealDouble, Precision) {
                                         9.999999999999998e+47);
   CheckDecimalToReal<TypeParam, double>("-99999999999999978859343891977453174784", -10,
                                         -9.999999999999998e+47);
+  // Integers are always exact
+  auto scale = TypeParam::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');
+  CheckDecimalToReal<TypeParam, double>(seven, scale, 7.0);
+  CheckDecimalToReal<TypeParam, double>("-" + seven, scale, -7.0);
+
+  CheckDecimalToReal<TypeParam, double>("99999999999999999999.0000000000000000", 16,
+                                        99999999999999999999.0);
+  CheckDecimalToReal<TypeParam, double>("-99999999999999999999.0000000000000000", 16,
+                                        -99999999999999999999.0);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.9", 1, 9999999.9);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("-9999999.9", 1, -9999999.9);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("9999999.999999999999999", 15,
+                                                    9999999.999999999999999);
+  CheckDecimalToRealWithinOneULP<TypeParam, double>("-9999999.999999999999999", 15,
+                                                    -9999999.999999999999999);
+
+  // Large fractions are within 2^-52
+  constexpr double epsilon = 2.220446049250313080847263336181640625e-16;  // 2^-52
+  CheckDecimalToRealWithinEpsilon<TypeParam, double>(
+      "112334829348925.99070703983306884765625", 23, epsilon,
+      112334829348925.99070703983306884765625);
+  CheckDecimalToRealWithinEpsilon<TypeParam, double>(
+      "1.987748987892758765582589910934859345", 36, epsilon,
+      1.987748987892758765582589910934859345);
 }
 
 #endif  // __MINGW32__