From 933d43b5caefe47097debc86cd6a644242a5398c Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 4 Nov 2020 20:38:43 -0500 Subject: [PATCH 1/3] This is an experimental branch that might lead to some faster performance. It is currently unusable. --- include/fast_float/ascii_number.h | 88 +++++++++++++++++++------------ 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index b372367e..55507d4d 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -67,6 +67,7 @@ fastfloat_really_inline parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept { parsed_number_string answer; answer.valid = false; + answer.too_many_digits = false; answer.negative = (*p == '-'); if ((*p == '-') || (*p == '+')) { ++p; @@ -78,43 +79,80 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ } } const char *const start_digits = p; + // skip leading zeroes + while ((p != pend) && (*p == '0')) { p++; } + + // We can go forward up to 19 characters without overflow for sure, we might even go 20 characters + // or more if we have a decimal separator. We will adjust accordingly. + const char *pend_overflow_free = p + 19 > pend ? pend : p + 19; uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) - while ((p != pend) && is_integer(*p)) { + while ((p != pend_overflow_free) && is_integer(*p)) { // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - (*p - '0'); // might overflow, we will handle the overflow later + (*p - '0'); ++p; } int64_t exponent = 0; - if ((p != pend) && (*p == '.')) { + if ((p != pend_overflow_free) && (*p == '.')) { ++p; const char *first_after_period = p; - if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + if (i == 0) { + // Keep on skipping leading zeroes avec the decimal separator. + while ((p != pend) && (*p == '0')) { p++; } + // reset the ending point + pend_overflow_free = p + 19 > pend ? pend : p + 19; + } else if(pend_overflow_free < pend) { + pend_overflow_free++; // go one further thanks to '.' + } + if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; - if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } } - while ((p != pend) && is_integer(*p)) { + while ((p != pend_overflow_free) && is_integer(*p)) { uint8_t digit = uint8_t(*p - '0'); ++p; - i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + i = i * 10 + digit; } exponent = first_after_period - p; } // we must have encountered at least one integer! - if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) { - return answer; + // We only need this check if i == 0 which is preditably unlikely. + if(i == 0) { + if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) { + return answer; + } + } + if((p == pend_overflow_free) && (pend_overflow_free < pend)) { // We possibly have an overflow! + bool found_non_zero{false}; + if((exponent == 0) && (*(p-1) != '.')) { + // We have not yet encountered the '.' + // We do the pre-decimal part first. + while ((p != pend) && is_integer(*p)) { + found_non_zero |= (*p != '0'); + p++; + exponent += 1; + } + if ((p != pend) && (*p == '.')) { p++; } + while ((p != pend) && is_integer(*p)) { + found_non_zero |= (*p != '0'); + p++; + } + } else { + // This is the easy case, we just have to skip all of the digits! + while ((p != pend) && is_integer(*p)) { + found_non_zero |= (*p != '0'); + p++; + } + } + answer.too_many_digits = found_non_zero; } - - int32_t digit_count = - int32_t(p - start_digits - 1); // used later to guard against overflows - if ((p != pend) && (('e' == *p) || ('E' == *p))) { if((fmt & chars_format::fixed) && !(fmt & chars_format::scientific)) { return answer; } int64_t exp_number = 0; // exponential part @@ -142,26 +180,6 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ } answer.lastmatch = p; answer.valid = true; - - // If we frequently had to deal with long strings of digits, - // we could extend our code by using a 128-bit integer instead - // of a 64-bit integer. However, this is uncommon. - if (((digit_count >= 19))) { // this is uncommon - // It is possible that the integer had an overflow. - // We have to handle the case where we have 0.0000somenumber. - const char *start = start_digits; - while (*start == '0' || (*start == '.')) { - start++; - } - // we over-decrement by one when there is a decimal separator - digit_count -= int(start - start_digits); - if (digit_count >= 19) { - answer.mantissa = 0xFFFFFFFFFFFFFFFF; // important: we don't want the mantissa to be used in a fast path uninitialized. - answer.too_many_digits = true; - return answer; - } - } - answer.too_many_digits = false; answer.exponent = exponent; answer.mantissa = i; return answer; From f0495faad1d85f408bbf25d0982f4a8539c3c4f6 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 5 Jan 2021 14:34:18 -0500 Subject: [PATCH 2/3] Fixing the code. --- include/fast_float/ascii_number.h | 133 ++++++++++-------- include/fast_float/float_common.h | 28 ---- include/fast_float/parse_number.h | 51 +++++-- .../fast_float/simple_decimal_conversion.h | 17 --- 4 files changed, 116 insertions(+), 113 deletions(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index 55507d4d..7033d2d8 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -61,6 +61,7 @@ struct parsed_number_string { }; + // Assuming that you use no more than 19 digits, this will // parse an ASCII string. fastfloat_really_inline @@ -79,82 +80,91 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ } } const char *const start_digits = p; - // skip leading zeroes - while ((p != pend) && (*p == '0')) { p++; } - // We can go forward up to 19 characters without overflow for sure, we might even go 20 characters // or more if we have a decimal separator. We will adjust accordingly. const char *pend_overflow_free = p + 19 > pend ? pend : p + 19; - uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) while ((p != pend_overflow_free) && is_integer(*p)) { // a multiplication by 10 is cheaper than an arbitrary integer // multiplication i = 10 * i + - (*p - '0'); + uint64_t(*p - '0'); ++p; } int64_t exponent = 0; - if ((p != pend_overflow_free) && (*p == '.')) { + constexpr uint64_t minimal_nineteen_digit_integer{1000000000000000000}; + if (p == pend_overflow_free) { // uncommon path! + // We enter this branch if we hit p == pend_overflow_free + // before the decimal separator so we have a big integer. + // E.g., 23123123232132...3232321 + // This is very uncommon. + while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) { + i = i * 10 + uint64_t(*p - '0'); + ++p; + } + const char *truncated_integer_part = p; + while ((p != pend) && is_integer(*p)) { p++; } + answer.too_many_digits = (p != truncated_integer_part); + if (i > minimal_nineteen_digit_integer) { + exponent = p - truncated_integer_part; + } + if((p != pend) && (*p == '.')) { + p++; // skip the '.' + const char *first_after_period = p; + while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) { + i = i * 10 + uint64_t(*p - '0'); + ++p; + } + const char *truncated_point = p; + if(p > first_after_period) { + exponent = first_after_period - p; + } + // next we truncate: + while ((p != pend) && is_integer(*p)) { p++; } + answer.too_many_digits |= (p != truncated_point); + } + } else if (*p == '.') { + pend_overflow_free++; // go one further thanks to '.' ++p; const char *first_after_period = p; - if (i == 0) { - // Keep on skipping leading zeroes avec the decimal separator. - while ((p != pend) && (*p == '0')) { p++; } - // reset the ending point - pend_overflow_free = p + 19 > pend ? pend : p + 19; - } else if(pend_overflow_free < pend) { - pend_overflow_free++; // go one further thanks to '.' - } +#if FASTFLOAT_IS_BIG_ENDIAN == 0 + // Fast approach only tested under little endian systems if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); + i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); + i = i * 100000000 + parse_eight_digits_unrolled(p); p += 8; } } +#endif while ((p != pend_overflow_free) && is_integer(*p)) { uint8_t digit = uint8_t(*p - '0'); ++p; - i = i * 10 + digit; + i = i * 10 + digit; + } + if(p == pend_overflow_free) { // uncommon path! + // We might have leading zeros as in 0.000001232132... + // As long as i < minimal_nineteen_digit_integer then we know that we + // did not hit 19 digits (omitting leading zeroes). + while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) { + i = i * 10 + uint64_t(*p - '0'); + ++p; + } } exponent = first_after_period - p; - } - // we must have encountered at least one integer! - // We only need this check if i == 0 which is preditably unlikely. - if(i == 0) { - if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) { - return answer; + if((p != pend) && is_integer(*p)) { // uncommon path + answer.too_many_digits = true; + do { p++; } while ((p != pend) && is_integer(*p)); } } - if((p == pend_overflow_free) && (pend_overflow_free < pend)) { // We possibly have an overflow! - bool found_non_zero{false}; - if((exponent == 0) && (*(p-1) != '.')) { - // We have not yet encountered the '.' - // We do the pre-decimal part first. - while ((p != pend) && is_integer(*p)) { - found_non_zero |= (*p != '0'); - p++; - exponent += 1; - } - if ((p != pend) && (*p == '.')) { p++; } - while ((p != pend) && is_integer(*p)) { - found_non_zero |= (*p != '0'); - p++; - } - } else { - // This is the easy case, we just have to skip all of the digits! - while ((p != pend) && is_integer(*p)) { - found_non_zero |= (*p != '0'); - p++; - } - } - answer.too_many_digits = found_non_zero; + // we must have encountered at least one integer! + if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) { + return answer; } - if ((p != pend) && (('e' == *p) || ('E' == *p))) { - if((fmt & chars_format::fixed) && !(fmt & chars_format::scientific)) { return answer; } + if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { + const char * location_of_e = p; int64_t exp_number = 0; // exponential part ++p; bool neg_exp = false; @@ -165,27 +175,34 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ ++p; } if ((p == pend) || !is_integer(*p)) { - return answer; - } - while ((p != pend) && is_integer(*p)) { - uint8_t digit = uint8_t(*p - '0'); - if (exp_number < 0x10000) { - exp_number = 10 * exp_number + digit; + if(!(fmt & chars_format::fixed)) { + // We are in error. + return answer; } - ++p; + // Otherwise, we will be ignoring the 'e'. + p = location_of_e; + } else { + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + exponent += (neg_exp ? -exp_number : exp_number); } - exponent += (neg_exp ? -exp_number : exp_number); } else { - if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } + // If it scientific and not fixed, we have to bail out. + if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } } answer.lastmatch = p; answer.valid = true; + answer.exponent = exponent; answer.mantissa = i; return answer; } - // This should always succeed since it follows a call to parse_number_string // This function could be optimized. In particular, we could stop after 19 digits // and try to bail out. Furthermore, we should be able to recover the computed diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h index f09d29b3..bb38108f 100644 --- a/include/fast_float/float_common.h +++ b/include/fast_float/float_common.h @@ -149,34 +149,6 @@ struct decimal { // Moves are allowed: decimal(decimal &&) = default; decimal &operator=(decimal &&other) = default; - // Generates a mantissa by truncating to 19 digits; this function assumes - // that num_digits >= 19 (the caller is responsible for the check). - // This function should be reasonably fast. - inline uint64_t to_truncated_mantissa() { - uint64_t val; - // 8 first digits - ::memcpy(&val, digits, sizeof(uint64_t)); - val = val * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - uint64_t mantissa = - uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); - // 8 more digits for a total of 16 - ::memcpy(&val, digits + sizeof(uint64_t), sizeof(uint64_t)); - val = val * 2561 >> 8; - val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; - uint32_t eight_digits_value = - uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); - mantissa = 100000000 * mantissa + eight_digits_value; - for (uint32_t i = 2 * sizeof(uint64_t); i < max_digit_without_overflow; - i++) { - mantissa = mantissa * 10 + digits[i]; // can be accelerated - } - return mantissa; - } - // Generate san exponent matching to_truncated_mantissa() - inline int32_t to_truncated_exponent() { - return decimal_point - max_digit_without_overflow; - } }; constexpr static double powers_of_ten_double[] = { diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h index 9b2d5b5a..d2d52e78 100644 --- a/include/fast_float/parse_number.h +++ b/include/fast_float/parse_number.h @@ -91,17 +91,48 @@ from_chars_result from_chars(const char *first, const char *last, answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; - if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path()) { - value = T(pns.mantissa); - if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } - else { value = value * binary_format::exact_power_of_ten(pns.exponent); } - if (pns.negative) { value = -value; } - return answer; + answer.ec = std::errc(); // be optimistic + answer.ptr = pns.lastmatch; + adjusted_mantissa am; + // Most times, we have pns.too_many_digits = false. + if(pns.too_many_digits) { + // Uncommon path where we have too many digits. + // + // credit: R. Oudompheng who first implemented this fast path. + // It does the job of accelerating the slow path since most + // long streams of digits are determined after 19 digits. + // Note that mantissa+1 cannot overflow since mantissa < 10**19 and so + // mantissa+1 <= 10**19 < 2**64. + adjusted_mantissa am1 = compute_float>(pns.exponent, pns.mantissa); + adjusted_mantissa am2 = compute_float>(pns.exponent, pns.mantissa+1); + // They must both agree and be both a successful result. + if(( am1 == am2 ) && (am1.power2 >= 0)) { + am = am1; + } else { + // long way! (uncommon) + decimal d = parse_decimal(first, last); + am = compute_float>(d); + } + } else { + // We are entering the common path where the number of digits is no more than 19. + // + // Next is Clinger's fast path. + if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path()) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } + return answer; + } + // Then we have our main routine. + am = compute_float>(pns.exponent, pns.mantissa); + // If we called compute_float>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0), + // then we need to go the long way around again. This is very uncommon. + if(am.power2 < 0) { // long way! (uncommon) + decimal d = parse_decimal(first, last); + am = compute_float>(d); + } } - adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa>(first,last) : compute_float>(pns.exponent, pns.mantissa); - // If we called compute_float>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0), - // then we need to go the long way around again. This is very uncommon. - if(am.power2 < 0) { am = parse_long_mantissa>(first,last); } uint64_t word = am.mantissa; word |= uint64_t(am.power2) << binary_format::mantissa_explicit_bits(); word = pns.negative diff --git a/include/fast_float/simple_decimal_conversion.h b/include/fast_float/simple_decimal_conversion.h index 8ca0cb6b..e27c4c14 100644 --- a/include/fast_float/simple_decimal_conversion.h +++ b/include/fast_float/simple_decimal_conversion.h @@ -351,22 +351,5 @@ adjusted_mantissa compute_float(decimal &d) { return answer; } -template -adjusted_mantissa parse_long_mantissa(const char *first, const char* last) { - decimal d = parse_decimal(first, last); - // In some cases we can get lucky and looking at only the first 19 digits is enough. - // Let us try that. - const uint64_t mantissa = d.to_truncated_mantissa(); - const int64_t exponent = d.to_truncated_exponent(); - // credit: R. Oudompheng who first implemented this fast path (to my knowledge). - // It is rough, but it does the job of accelerating the slow path since most - // long streams of digits are determined after 19 digits. - adjusted_mantissa am1 = compute_float(exponent, mantissa); - adjusted_mantissa am2 = compute_float(exponent, mantissa+1); - // They must both agree and be both a successful result. - if(( am1 == am2 ) && (am1.power2 >= 0)) { return am1; } - return compute_float(d); -} - } // namespace fast_float #endif From de54bdbdc92389452744064e6e814aa4997488e8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 5 Jan 2021 15:15:03 -0500 Subject: [PATCH 3/3] Adding a few tests. --- include/fast_float/ascii_number.h | 2 +- tests/basictest.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h index cbed1d14..13c90543 100644 --- a/include/fast_float/ascii_number.h +++ b/include/fast_float/ascii_number.h @@ -99,7 +99,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_ const char *truncated_integer_part = p; while ((p != pend) && is_integer(*p)) { p++; } answer.too_many_digits = (p != truncated_integer_part); - if (i > minimal_nineteen_digit_integer) { + if (i >= minimal_nineteen_digit_integer) { exponent = p - truncated_integer_part; } if((p != pend) && (*p == '.')) { diff --git a/tests/basictest.cpp b/tests/basictest.cpp index 2df1b603..abeb4c04 100644 --- a/tests/basictest.cpp +++ b/tests/basictest.cpp @@ -288,6 +288,9 @@ TEST_CASE("64bit.inf") { } TEST_CASE("64bit.general") { + verify("10000000000000000000", 0x1.158e460913dp+63); + verify("10000000000000000000000000000001000000000000", 0x1.cb2d6f618c879p+142); + verify("10000000000000000000000000000000000000000001", 0x1.cb2d6f618c879p+142); verify("1.1920928955078125e-07", 1.1920928955078125e-07); verify("9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709",0x1.03ae05e8fca1cp+63); verify("-0",-0.0); @@ -341,6 +344,7 @@ TEST_CASE("64bit.general") { TEST_CASE("32bit.inf") { + verify("100000000000000000026609864708367276537402401181200809098131977453489758916313088.0", std::numeric_limits::infinity()); verify("INF", std::numeric_limits::infinity()); verify("-INF", -std::numeric_limits::infinity()); verify("INFINITY", std::numeric_limits::infinity()); @@ -360,6 +364,7 @@ TEST_CASE("32bit.general") { verify32(1.1754942107e-38f); verify32(1.1754943508e-45f); verify("-0", -0.0f); + verify("10000000000000000000", 0x1.158e46p+63f); verify("1090544144181609348835077142190", 0x1.b877ap+99f); verify("1.1754943508e-38", 1.1754943508e-38f); verify("30219.0830078125", 30219.0830078125f);