From a726cc3f8ff684edaee2ecb086c4e44fbdd4e313 Mon Sep 17 00:00:00 2001
From: Daniel Lemire <daniel@lemire.me>
Date: Tue, 24 Jun 2025 14:28:01 -0400
Subject: [PATCH 1/2] design a backward algorithm to produce the shortest
 string

---
 benchmarks/algorithms.h  | 131 +++++++++++++++++++++++++++++++++++++++
 benchmarks/benchmark.cpp |   8 ++-
 2 files changed, 138 insertions(+), 1 deletion(-)
diff --git a/benchmarks/algorithms.h b/benchmarks/algorithms.h
index 3480c6f..aef27fe 100644
--- a/benchmarks/algorithms.h
+++ b/benchmarks/algorithms.h
@@ -63,6 +63,126 @@ struct BenchArgs {
 
 namespace BenchmarkShortest {
 
+
+/**
+ * We have that std::to_chars does not produce the shortest
+ * representation for numbers in scientific notation, so we
+ * optimize the string representation to be shorter.
+ */
+inline std::string optimize_number_string(const std::string &input) {
+  // Check if input contains 'E' or 'e' for scientific notation
+  auto e_pos = input.find_first_of("Ee");
+  if (e_pos != std::string::npos) {
+    // Handle scientific notation
+    std::string mantissa = input.substr(0, e_pos);
+    std::string exponent = input.substr(e_pos + 1);
+
+    // Remove leading zeros in exponent, preserving sign
+    bool negative = exponent[0] == '-';
+    exponent.erase(0, negative ? 1 : 0);
+    exponent.erase(0, exponent.find_first_not_of('0'));
+    if (exponent.empty())
+      exponent = "0";
+    if (negative && exponent != "0")
+      exponent = "-" + exponent;
+
+    // Reconstruct the number
+    return mantissa + "E" + exponent;
+  }
+
+  // Handle non-scientific notation
+  if (input == "0" || input == "-0")
+    return input;
+
+  // Determine sign
+  bool is_negative = input[0] == '-';
+  std::string num = is_negative ? input.substr(1) : input;
+
+  // Find first and last significant digits
+  std::string digits = num;
+  size_t decimal_pos = digits.find('.');
+  if (decimal_pos != std::string::npos) {
+    digits.erase(decimal_pos, 1); // Remove decimal point
+  }
+  size_t first_non_zero = digits.find_first_not_of('0');
+  size_t last_non_zero = digits.find_last_not_of('0');
+  digits = digits.substr(first_non_zero, last_non_zero - first_non_zero + 1);
+  // Count significant digits
+  size_t num_digits = digits.length();
+  if (num_digits == 0)
+    return input;
+  // Calculate exponent
+  size_t input_decimal_pos = input.find('.');
+  size_t input_first_non_zero = input.find_first_not_of('0');
+  size_t input_last_non_zero = input.find_last_not_of('0');
+
+  int exponent = 0;
+  if (input_decimal_pos == std::string::npos) {
+    // we have 123232900000
+    exponent = (input_last_non_zero - input_first_non_zero);
+  } else if (input_last_non_zero < input_decimal_pos) {
+    // Number like 123.456 or 0.456
+    exponent = (input_decimal_pos - input_first_non_zero - 1);
+  } else {
+    // Number like 0.000123
+    exponent =
+        -static_cast<int>(input.find_first_not_of('0', input_decimal_pos + 1) -
+                          input_decimal_pos);
+  }
+  // Calculate scientific notation length
+  size_t mantissa_len =
+      num_digits + (num_digits > 1 ? 1 : 0); // Digits + optional decimal
+  size_t exponent_len = (exponent == 0) ? 1
+                                        : (exponent < 0 ? 1 : 0) +
+                                              (std::abs(exponent) < 10    ? 1
+                                               : std::abs(exponent) < 100 ? 2
+                                                                          : 3);
+  size_t sci_len = mantissa_len + 1 + exponent_len +
+                   (is_negative ? 1 : 0); // Mantissa + E + exponent + sign
+
+  // Compare lengths
+  if (sci_len >= input.length())
+    return input;
+
+  // Construct scientific notation
+  std::string result;
+  if (is_negative)
+    result += "-";
+  result += digits[0];
+  if (num_digits > 1) {
+    result += ".";
+    result += digits.substr(1);
+  }
+  result += "e";
+  result += std::to_string(exponent);
+
+  return result;
+}
+
+/**
+  * This is a special version of std::to_chars that produces the shortest
+  * representation for numbers. It should not be used for benchmarking.
+ */
+template<arithmetic_float T>
+int std_to_chars_shorter(T d, std::span<char>& buffer) {
+#if TO_CHARS_SUPPORTED
+  const auto [p, ec]
+      = std::to_chars(buffer.data(), buffer.data() + buffer.size(), d);
+  if (ec != std::errc()) {
+    std::cerr << "problem with " << d << std::endl;
+    std::abort();
+  }
+  // This is ridiculous, optimize:
+  std::string result(buffer.data(), p - buffer.data());
+  result = optimize_number_string(result);
+  std::memcpy(buffer.data(), result.data(), result.size());
+  return result.size();
+#else
+  std::cerr << "std::to_chars not supported" << std::endl;
+  std::abort();
+#endif
+}
+
 template<arithmetic_float T>
 int dragon4(T d, std::span<char>& buffer) {
   if constexpr (std::is_same_v<T, float>)
@@ -432,6 +552,7 @@ int std_to_chars(T d, std::span<char>& buffer) {
 #endif
 }
 
+
 }  // namespace BenchmarksShortest
 
 template <typename T>
@@ -441,6 +562,16 @@ auto wrap(int (*fn)(T, std::span<char>&)) {
   };
 }
 
+// Experimental: shorter representation for std::to_chars
+// This is not a benchmark, but a utility function to produce the shortest
+// representation of a floating-point number using std::to_chars.
+// It is not used in the benchmarks, but can be useful for other purposes.
+// It is not optimized for performance, but for producing the shortest string.
+template <arithmetic_float T>
+BenchArgs<T> get_std_to_chars_shorter() {
+  return BenchArgs<T>("std_to_chars_short", wrap(BenchmarkShortest::std_to_chars_shorter<T>), TO_CHARS_SUPPORTED);
+}
+
 template <arithmetic_float T>
 std::vector<BenchArgs<T>> initArgs(bool use_errol = false, size_t repeat = 0, size_t fixed_size = 0) {
   std::vector<BenchArgs<T>> args;
diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
index 9319852..8362727 100644
--- a/benchmarks/benchmark.cpp
+++ b/benchmarks/benchmark.cpp
@@ -154,8 +154,14 @@ bool is_exact_integer(float_type x) {
 // New template version of describe
 template <typename T>
 void describe(const std::variant<std::vector<TestCase<float>>, std::vector<TestCase<double>>> &numbers,
-             const std::vector<BenchArgs<T>> &args,
+             std::vector<BenchArgs<T>> args,
              const std::vector<std::string> &algo_filter) {
+              if constexpr (std::is_same_v<T, float>) {
+                args.push_back(get_std_to_chars_shorter<float>());
+              } else if constexpr (std::is_same_v<T, double>) {
+                args.push_back(get_std_to_chars_shorter<double>());
+              }
+
   std::visit([&args, &algo_filter](const auto &lines) {
     size_t integers64 = 0;
     size_t integers32 = 0;

From 0490d8b44cab857b9cd42de1d3f24122bf442edf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ja=C3=ABl=20Champagne=20Gareau?= <gareau_jael@hotmail.com>
Date: Fri, 27 Jun 2025 00:47:37 -0400
Subject: [PATCH 2/2] fix missing case in shortest string generation

"1.23e+004" now becomes "1.23e4" instead of staying the same
---
 benchmarks/algorithms.h  | 67 ++++++++++++++++++++--------------------
 benchmarks/benchmark.cpp | 37 +++++++++++-----------
 2 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/benchmarks/algorithms.h b/benchmarks/algorithms.h
index aef27fe..cb66f24 100644
--- a/benchmarks/algorithms.h
+++ b/benchmarks/algorithms.h
@@ -44,7 +44,6 @@
 #define YY_DOUBLE_SUPPORTED 0
 #endif
 
-
 template<arithmetic_float T>
 struct BenchArgs {
   using Type = T;
@@ -63,7 +62,6 @@ struct BenchArgs {
 
 namespace BenchmarkShortest {
 
-
 /**
  * We have that std::to_chars does not produce the shortest
  * representation for numbers in scientific notation, so we
@@ -71,15 +69,16 @@ namespace BenchmarkShortest {
  */
 inline std::string optimize_number_string(const std::string &input) {
   // Check if input contains 'E' or 'e' for scientific notation
-  auto e_pos = input.find_first_of("Ee");
-  if (e_pos != std::string::npos) {
+  if (const auto e_pos = input.find_first_of("Ee");
+      e_pos != std::string::npos) {
     // Handle scientific notation
-    std::string mantissa = input.substr(0, e_pos);
+    const std::string mantissa = input.substr(0, e_pos);
     std::string exponent = input.substr(e_pos + 1);
 
     // Remove leading zeros in exponent, preserving sign
-    bool negative = exponent[0] == '-';
-    exponent.erase(0, negative ? 1 : 0);
+    const bool negative = exponent[0] == '-';
+    const bool positive = exponent[0] == '+';
+    exponent.erase(0, (negative || positive) ? 1 : 0);
     exponent.erase(0, exponent.find_first_not_of('0'));
     if (exponent.empty())
       exponent = "0";
@@ -87,7 +86,7 @@ inline std::string optimize_number_string(const std::string &input) {
       exponent = "-" + exponent;
 
     // Reconstruct the number
-    return mantissa + "E" + exponent;
+    return mantissa + "e" + exponent;
   }
 
   // Handle non-scientific notation
@@ -95,28 +94,29 @@ inline std::string optimize_number_string(const std::string &input) {
     return input;
 
   // Determine sign
-  bool is_negative = input[0] == '-';
-  std::string num = is_negative ? input.substr(1) : input;
+  const bool is_negative = input[0] == '-';
 
   // Find first and last significant digits
-  std::string digits = num;
-  size_t decimal_pos = digits.find('.');
-  if (decimal_pos != std::string::npos) {
-    digits.erase(decimal_pos, 1); // Remove decimal point
+  std::string digits = is_negative ? input.substr(1) : input;
+  if (const size_t decimal_pos = digits.find('.');
+      decimal_pos != std::string::npos) {
+    digits.erase(decimal_pos, 1);  // Remove decimal point
   }
-  size_t first_non_zero = digits.find_first_not_of('0');
-  size_t last_non_zero = digits.find_last_not_of('0');
+  const size_t first_non_zero = digits.find_first_not_of('0');
+  const size_t last_non_zero = digits.find_last_not_of('0');
   digits = digits.substr(first_non_zero, last_non_zero - first_non_zero + 1);
+
   // Count significant digits
-  size_t num_digits = digits.length();
+  const size_t num_digits = digits.length();
   if (num_digits == 0)
     return input;
+
   // Calculate exponent
-  size_t input_decimal_pos = input.find('.');
-  size_t input_first_non_zero = input.find_first_not_of('0');
-  size_t input_last_non_zero = input.find_last_not_of('0');
+  const size_t input_decimal_pos = input.find('.');
+  const size_t input_first_non_zero = input.find_first_not_of('0');
+  const size_t input_last_non_zero = input.find_last_not_of('0');
 
-  int exponent = 0;
+  int exponent;
   if (input_decimal_pos == std::string::npos) {
     // we have 123232900000
     exponent = (input_last_non_zero - input_first_non_zero);
@@ -126,19 +126,21 @@ inline std::string optimize_number_string(const std::string &input) {
   } else {
     // Number like 0.000123
     exponent =
-        -static_cast<int>(input.find_first_not_of('0', input_decimal_pos + 1) -
-                          input_decimal_pos);
+      -static_cast<int>(input.find_first_not_of('0', input_decimal_pos + 1)
+                        - input_decimal_pos);
   }
   // Calculate scientific notation length
-  size_t mantissa_len =
-      num_digits + (num_digits > 1 ? 1 : 0); // Digits + optional decimal
-  size_t exponent_len = (exponent == 0) ? 1
-                                        : (exponent < 0 ? 1 : 0) +
-                                              (std::abs(exponent) < 10    ? 1
-                                               : std::abs(exponent) < 100 ? 2
-                                                                          : 3);
-  size_t sci_len = mantissa_len + 1 + exponent_len +
-                   (is_negative ? 1 : 0); // Mantissa + E + exponent + sign
+  const size_t mantissa_len =
+    num_digits + (num_digits > 1 ? 1 : 0);  // Digits + optional decimal
+  const size_t exponent_len = (exponent == 0)
+                                  ? 1
+                                  : (exponent < 0 ? 1 : 0)
+                                        + (std::abs(exponent) < 10    ? 1
+                                           : std::abs(exponent) < 100 ? 2
+                                                                      : 3);
+  const size_t sci_len =
+    mantissa_len + 1 + exponent_len
+        + (is_negative ? 1 : 0);  // Mantissa + E + exponent + sign
 
   // Compare lengths
   if (sci_len >= input.length())
@@ -552,7 +554,6 @@ int std_to_chars(T d, std::span<char>& buffer) {
 #endif
 }
 
-
 }  // namespace BenchmarksShortest
 
 template <typename T>
diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp
index 8362727..5a40765 100644
--- a/benchmarks/benchmark.cpp
+++ b/benchmarks/benchmark.cpp
@@ -37,11 +37,11 @@ void evaluateProperties(const std::vector<TestCase<T>> &lines,
 }
 
 struct diy_float_t {
-    diy_float_t(uint64_t significand, int exponent, bool is_negative)
-      : significand(significand), exponent(exponent), is_negative(is_negative) {}
-		uint64_t	      significand;
-		int							exponent;
-		bool						is_negative;
+  diy_float_t(uint64_t significand, int exponent, bool is_negative)
+    : significand(significand), exponent(exponent), is_negative(is_negative) {}
+  uint64_t significand;
+  int exponent;
+  bool is_negative;
 };
 
 template <arithmetic_float T>
@@ -144,23 +144,24 @@ std::vector<TestCase<T>> get_random_numbers(size_t howmany,
 // Checks if a floating-point number is exactly representable as the specified integer type
 template <std::integral int_type, std::floating_point float_type>
 bool is_exact_integer(float_type x) {
-    if (!std::isfinite(x)) {
-        return false;
-    }
-    int_type i = static_cast<int_type>(x);
-    return static_cast<float_type>(i) == x;
+  if (!std::isfinite(x)) {
+    return false;
+  }
+  int_type i = static_cast<int_type>(x);
+  return static_cast<float_type>(i) == x;
 }
 
 // New template version of describe
 template <typename T>
-void describe(const std::variant<std::vector<TestCase<float>>, std::vector<TestCase<double>>> &numbers,
-             std::vector<BenchArgs<T>> args,
-             const std::vector<std::string> &algo_filter) {
-              if constexpr (std::is_same_v<T, float>) {
-                args.push_back(get_std_to_chars_shorter<float>());
-              } else if constexpr (std::is_same_v<T, double>) {
-                args.push_back(get_std_to_chars_shorter<double>());
-              }
+void describe(const std::variant<std::vector<TestCase<float>>,
+              std::vector<TestCase<double>>> &numbers,
+              std::vector<BenchArgs<T>> args,
+              const std::vector<std::string> &algo_filter) {
+  if constexpr (std::is_same_v<T, float>) {
+    args.push_back(get_std_to_chars_shorter<float>());
+  } else if constexpr (std::is_same_v<T, double>) {
+    args.push_back(get_std_to_chars_shorter<double>());
+  }
 
   std::visit([&args, &algo_filter](const auto &lines) {
     size_t integers64 = 0;