Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 133 additions & 1 deletion benchmarks/algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
#define YY_DOUBLE_SUPPORTED 0
#endif


template<arithmetic_float T>
struct BenchArgs {
using Type = T;
Expand All @@ -63,6 +62,129 @@ struct BenchArgs {

namespace BenchmarkShortest {

/**
* We have that std::to_chars does not produce the shortest
* representation for numbers in scientific notation, so we
* optimize the string representation to be shorter.
*/
inline std::string optimize_number_string(const std::string &input) {
// Check if input contains 'E' or 'e' for scientific notation
if (const auto e_pos = input.find_first_of("Ee");
e_pos != std::string::npos) {
// Handle scientific notation
const std::string mantissa = input.substr(0, e_pos);
std::string exponent = input.substr(e_pos + 1);

// Remove leading zeros in exponent, preserving sign
const bool negative = exponent[0] == '-';
const bool positive = exponent[0] == '+';
exponent.erase(0, (negative || positive) ? 1 : 0);
exponent.erase(0, exponent.find_first_not_of('0'));
if (exponent.empty())
exponent = "0";
if (negative && exponent != "0")
exponent = "-" + exponent;

// Reconstruct the number
return mantissa + "e" + exponent;
}

// Handle non-scientific notation
if (input == "0" || input == "-0")
return input;

// Determine sign
const bool is_negative = input[0] == '-';

// Find first and last significant digits
std::string digits = is_negative ? input.substr(1) : input;
if (const size_t decimal_pos = digits.find('.');
decimal_pos != std::string::npos) {
digits.erase(decimal_pos, 1); // Remove decimal point
}
const size_t first_non_zero = digits.find_first_not_of('0');
const size_t last_non_zero = digits.find_last_not_of('0');
digits = digits.substr(first_non_zero, last_non_zero - first_non_zero + 1);

// Count significant digits
const size_t num_digits = digits.length();
if (num_digits == 0)
return input;

// Calculate exponent
const size_t input_decimal_pos = input.find('.');
const size_t input_first_non_zero = input.find_first_not_of('0');
const size_t input_last_non_zero = input.find_last_not_of('0');

int exponent;
if (input_decimal_pos == std::string::npos) {
// we have 123232900000
exponent = (input_last_non_zero - input_first_non_zero);
} else if (input_last_non_zero < input_decimal_pos) {
// Number like 123.456 or 0.456
exponent = (input_decimal_pos - input_first_non_zero - 1);
} else {
// Number like 0.000123
exponent =
-static_cast<int>(input.find_first_not_of('0', input_decimal_pos + 1)
- input_decimal_pos);
}
// Calculate scientific notation length
const size_t mantissa_len =
num_digits + (num_digits > 1 ? 1 : 0); // Digits + optional decimal
const size_t exponent_len = (exponent == 0)
? 1
: (exponent < 0 ? 1 : 0)
+ (std::abs(exponent) < 10 ? 1
: std::abs(exponent) < 100 ? 2
: 3);
const size_t sci_len =
mantissa_len + 1 + exponent_len
+ (is_negative ? 1 : 0); // Mantissa + E + exponent + sign

// Compare lengths
if (sci_len >= input.length())
return input;

// Construct scientific notation
std::string result;
if (is_negative)
result += "-";
result += digits[0];
if (num_digits > 1) {
result += ".";
result += digits.substr(1);
}
result += "e";
result += std::to_string(exponent);

return result;
}

/**
* This is a special version of std::to_chars that produces the shortest
* representation for numbers. It should not be used for benchmarking.
*/
template<arithmetic_float T>
int std_to_chars_shorter(T d, std::span<char>& buffer) {
#if TO_CHARS_SUPPORTED
const auto [p, ec]
= std::to_chars(buffer.data(), buffer.data() + buffer.size(), d);
if (ec != std::errc()) {
std::cerr << "problem with " << d << std::endl;
std::abort();
}
// This is ridiculous, optimize:
std::string result(buffer.data(), p - buffer.data());
result = optimize_number_string(result);
std::memcpy(buffer.data(), result.data(), result.size());
return result.size();
#else
std::cerr << "std::to_chars not supported" << std::endl;
std::abort();
#endif
}

template<arithmetic_float T>
int dragon4(T d, std::span<char>& buffer) {
if constexpr (std::is_same_v<T, float>)
Expand Down Expand Up @@ -441,6 +563,16 @@ auto wrap(int (*fn)(T, std::span<char>&)) {
};
}

// Experimental: shorter representation for std::to_chars
// This is not a benchmark, but a utility function to produce the shortest
// representation of a floating-point number using std::to_chars.
// It is not used in the benchmarks, but can be useful for other purposes.
// It is not optimized for performance, but for producing the shortest string.
template <arithmetic_float T>
BenchArgs<T> get_std_to_chars_shorter() {
return BenchArgs<T>("std_to_chars_short", wrap(BenchmarkShortest::std_to_chars_shorter<T>), TO_CHARS_SUPPORTED);
}

template <arithmetic_float T>
std::vector<BenchArgs<T>> initArgs(bool use_errol = false, size_t repeat = 0, size_t fixed_size = 0) {
std::vector<BenchArgs<T>> args;
Expand Down
33 changes: 20 additions & 13 deletions benchmarks/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ void evaluateProperties(const std::vector<TestCase<T>> &lines,
}

struct diy_float_t {
diy_float_t(uint64_t significand, int exponent, bool is_negative)
: significand(significand), exponent(exponent), is_negative(is_negative) {}
uint64_t significand;
int exponent;
bool is_negative;
diy_float_t(uint64_t significand, int exponent, bool is_negative)
: significand(significand), exponent(exponent), is_negative(is_negative) {}
uint64_t significand;
int exponent;
bool is_negative;
};

template <arithmetic_float T>
Expand Down Expand Up @@ -144,18 +144,25 @@ std::vector<TestCase<T>> get_random_numbers(size_t howmany,
// Checks if a floating-point number is exactly representable as the specified integer type
template <std::integral int_type, std::floating_point float_type>
bool is_exact_integer(float_type x) {
if (!std::isfinite(x)) {
return false;
}
int_type i = static_cast<int_type>(x);
return static_cast<float_type>(i) == x;
if (!std::isfinite(x)) {
return false;
}
int_type i = static_cast<int_type>(x);
return static_cast<float_type>(i) == x;
}

// New template version of describe
template <typename T>
void describe(const std::variant<std::vector<TestCase<float>>, std::vector<TestCase<double>>> &numbers,
const std::vector<BenchArgs<T>> &args,
const std::vector<std::string> &algo_filter) {
void describe(const std::variant<std::vector<TestCase<float>>,
std::vector<TestCase<double>>> &numbers,
std::vector<BenchArgs<T>> args,
const std::vector<std::string> &algo_filter) {
if constexpr (std::is_same_v<T, float>) {
args.push_back(get_std_to_chars_shorter<float>());
} else if constexpr (std::is_same_v<T, double>) {
args.push_back(get_std_to_chars_shorter<double>());
}

std::visit([&args, &algo_filter](const auto &lines) {
size_t integers64 = 0;
size_t integers32 = 0;
Expand Down