Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 59 additions & 40 deletions include/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ struct parsed_number_string {
};



// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
fastfloat_really_inline
parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept {
parsed_number_string answer;
answer.valid = false;
answer.too_many_digits = false;
answer.negative = (*p == '-');
if ((*p == '-') || (*p == '+')) {
++p;
Expand All @@ -71,46 +73,89 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
}
}
const char *const start_digits = p;

// We can go forward up to 19 characters without overflow for sure, we might even go 20 characters
// or more if we have a decimal separator. We will adjust accordingly.
const char *pend_overflow_free = p + 19 > pend ? pend : p + 19;
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)

while ((p != pend) && is_integer(*p)) {
while ((p != pend_overflow_free) && is_integer(*p)) {
// a multiplication by 10 is cheaper than an arbitrary integer
// multiplication
i = 10 * i +
uint64_t(*p - '0'); // might overflow, we will handle the overflow later
uint64_t(*p - '0');
++p;
}
int64_t exponent = 0;
if ((p != pend) && (*p == '.')) {
constexpr uint64_t minimal_nineteen_digit_integer{1000000000000000000};
if (p == pend_overflow_free) { // uncommon path!
// We enter this branch if we hit p == pend_overflow_free
// before the decimal separator so we have a big integer.
// E.g., 23123123232132...3232321
// This is very uncommon.
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
i = i * 10 + uint64_t(*p - '0');
++p;
}
const char *truncated_integer_part = p;
while ((p != pend) && is_integer(*p)) { p++; }
answer.too_many_digits = (p != truncated_integer_part);
if (i >= minimal_nineteen_digit_integer) {
exponent = p - truncated_integer_part;
}
if((p != pend) && (*p == '.')) {
p++; // skip the '.'
const char *first_after_period = p;
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
i = i * 10 + uint64_t(*p - '0');
++p;
}
const char *truncated_point = p;
if(p > first_after_period) {
exponent = first_after_period - p;
}
// next we truncate:
while ((p != pend) && is_integer(*p)) { p++; }
answer.too_many_digits |= (p != truncated_point);
}
} else if (*p == '.') {
pend_overflow_free++; // go one further thanks to '.'
++p;
const char *first_after_period = p;
#if FASTFLOAT_IS_BIG_ENDIAN == 0
// Fast approach only tested under little endian systems
if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p);
p += 8;
if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
if ((p + 8 <= pend_overflow_free) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p);
p += 8;
}
}
#endif
while ((p != pend) && is_integer(*p)) {
while ((p != pend_overflow_free) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - '0');
++p;
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
i = i * 10 + digit;
}
if(p == pend_overflow_free) { // uncommon path!
// We might have leading zeros as in 0.000001232132...
// As long as i < minimal_nineteen_digit_integer then we know that we
// did not hit 19 digits (omitting leading zeroes).
while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
i = i * 10 + uint64_t(*p - '0');
++p;
}
}
exponent = first_after_period - p;
if((p != pend) && is_integer(*p)) { // uncommon path
answer.too_many_digits = true;
do { p++; } while ((p != pend) && is_integer(*p));
}
}
// we must have encountered at least one integer!
if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) {
return answer;
}
// digit_count is the exact number of digits.
int32_t digit_count =
int32_t(p - start_digits); // used later to guard against overflows
if(exponent > 0) {digit_count--;}
if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
const char * location_of_e = p;
int64_t exp_number = 0; // exponential part
Expand Down Expand Up @@ -145,37 +190,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
}
answer.lastmatch = p;
answer.valid = true;

// If we frequently had to deal with long strings of digits,
// we could extend our code by using a 128-bit integer instead
// of a 64-bit integer. However, this is uncommon.
//
// We can deal with up to 19 digits.
if (((digit_count > 19))) { // this is uncommon
// It is possible that the integer had an overflow.
// We have to handle the case where we have 0.0000somenumber.
// We need to be mindful of the case where we only have zeroes...
// E.g., 0.000000000...000.
const char *start = start_digits;
while ((start != pend) && (*start == '0' || *start == '.')) {
if(*start == '.') { digit_count++; } // We will subtract it again later.
start++;
}
// We over-decrement by one when there is a decimal separator
digit_count -= int(start - start_digits);
if (digit_count > 19) {
answer.mantissa = 0xFFFFFFFFFFFFFFFF; // important: we don't want the mantissa to be used in a fast path uninitialized.
answer.too_many_digits = true;
return answer;
}
}
answer.too_many_digits = false;
answer.exponent = exponent;
answer.mantissa = i;
return answer;
}


// This should always succeed since it follows a call to parse_number_string
// This function could be optimized. In particular, we could stop after 19 digits
// and try to bail out. Furthermore, we should be able to recover the computed
Expand Down
38 changes: 0 additions & 38 deletions include/fast_float/float_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,44 +201,6 @@ struct decimal {
// Moves are allowed:
decimal(decimal &&) = default;
decimal &operator=(decimal &&other) = default;
// Generates a mantissa by truncating to 19 digits.
// This function should be reasonably fast.
// Note that the user is responsible to ensure that digits are
// initialized to zero when there are fewer than 19.
inline uint64_t to_truncated_mantissa() {
#if FASTFLOAT_IS_BIG_ENDIAN == 1
uint64_t mantissa = 0;
for (uint32_t i = 0; i < max_digit_without_overflow;
i++) {
mantissa = mantissa * 10 + digits[i]; // can be accelerated
}
return mantissa;
#else
uint64_t val;
// 8 first digits
::memcpy(&val, digits, sizeof(uint64_t));
val = val * 2561 >> 8;
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
uint64_t mantissa =
uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
// 8 more digits for a total of 16
::memcpy(&val, digits + sizeof(uint64_t), sizeof(uint64_t));
val = val * 2561 >> 8;
val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
uint32_t eight_digits_value =
uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
mantissa = 100000000 * mantissa + eight_digits_value;
for (uint32_t i = 2 * sizeof(uint64_t); i < max_digit_without_overflow;
i++) {
mantissa = mantissa * 10 + digits[i]; // can be accelerated
}
return mantissa;
#endif
}
// Generate an exponent matching to_truncated_mantissa()
inline int32_t to_truncated_exponent() {
return decimal_point - int32_t(max_digit_without_overflow);
}
};

constexpr static double powers_of_ten_double[] = {
Expand Down
50 changes: 39 additions & 11 deletions include/fast_float/parse_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,46 @@ from_chars_result from_chars(const char *first, const char *last,
}
answer.ec = std::errc(); // be optimistic
answer.ptr = pns.lastmatch;
// Next is Clinger's fast path.
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
value = T(pns.mantissa);
if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
if (pns.negative) { value = -value; }
return answer;
adjusted_mantissa am;
// Most times, we have pns.too_many_digits = false.
if(pns.too_many_digits) {
// Uncommon path where we have too many digits.
//
// credit: R. Oudompheng who first implemented this fast path.
// It does the job of accelerating the slow path since most
// long streams of digits are determined after 19 digits.
// Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
// mantissa+1 <= 10**19 < 2**64.
adjusted_mantissa am1 = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
adjusted_mantissa am2 = compute_float<binary_format<T>>(pns.exponent, pns.mantissa+1);
// They must both agree and be both a successful result.
if(( am1 == am2 ) && (am1.power2 >= 0)) {
am = am1;
} else {
// long way! (uncommon)
decimal d = parse_decimal(first, last);
am = compute_float<binary_format<T>>(d);
}
} else {
// We are entering the common path where the number of digits is no more than 19.
//
// Next is Clinger's fast path.
if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
value = T(pns.mantissa);
if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
if (pns.negative) { value = -value; }
return answer;
}
// Then we have our main routine.
am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
// If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
// then we need to go the long way around again. This is very uncommon.
if(am.power2 < 0) { // long way! (uncommon)
decimal d = parse_decimal(first, last);
am = compute_float<binary_format<T>>(d);
}
}
adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa<binary_format<T>>(first,last) : compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
// If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
// then we need to go the long way around again. This is very uncommon.
if(am.power2 < 0) { am = parse_long_mantissa<binary_format<T>>(first,last); }
uint64_t word = am.mantissa;
word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
word = pns.negative
Expand Down
19 changes: 0 additions & 19 deletions include/fast_float/simple_decimal_conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,24 +350,5 @@ adjusted_mantissa compute_float(decimal &d) {
return answer;
}

template <typename binary>
adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
decimal d = parse_decimal(first, last);
// In some cases we can get lucky and looking at only the first 19 digits is enough.
// Let us try that.
const uint64_t mantissa = d.to_truncated_mantissa();
const int64_t exponent = d.to_truncated_exponent();
// credit: R. Oudompheng who first implemented this fast path (to my knowledge).
// It is rough, but it does the job of accelerating the slow path since most
// long streams of digits are determined after 19 digits.
// Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
// mantissa+1 <= 10**19 < 2**64.
adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
// They must both agree and be both a successful result.
if(( am1 == am2 ) && (am1.power2 >= 0)) { return am1; }
return compute_float<binary>(d);
}

} // namespace fast_float
#endif
5 changes: 5 additions & 0 deletions tests/basictest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ TEST_CASE("64bit.inf") {
}

TEST_CASE("64bit.general") {
verify("10000000000000000000", 0x1.158e460913dp+63);
verify("10000000000000000000000000000001000000000000", 0x1.cb2d6f618c879p+142);
verify("10000000000000000000000000000000000000000001", 0x1.cb2d6f618c879p+142);
verify("1.1920928955078125e-07", 1.1920928955078125e-07);
verify("9355950000000000000.00000000000000000000000000000000001844674407370955161600000184467440737095516161844674407370955161407370955161618446744073709551616000184467440737095516166000001844674407370955161618446744073709551614073709551616184467440737095516160001844674407370955161601844674407370955674451616184467440737095516140737095516161844674407370955161600018446744073709551616018446744073709551611616000184467440737095001844674407370955161600184467440737095516160018446744073709551168164467440737095516160001844073709551616018446744073709551616184467440737095516160001844674407536910751601611616000184467440737095001844674407370955161600184467440737095516160018446744073709551616184467440737095516160001844955161618446744073709551616000184467440753691075160018446744073709",0x1.03ae05e8fca1cp+63);
verify("-0",-0.0);
Expand Down Expand Up @@ -341,6 +344,7 @@ TEST_CASE("64bit.general") {


TEST_CASE("32bit.inf") {
verify("100000000000000000026609864708367276537402401181200809098131977453489758916313088.0", std::numeric_limits<float>::infinity());
verify("INF", std::numeric_limits<float>::infinity());
verify("-INF", -std::numeric_limits<float>::infinity());
verify("INFINITY", std::numeric_limits<float>::infinity());
Expand All @@ -360,6 +364,7 @@ TEST_CASE("32bit.general") {
verify32(1.1754942107e-38f);
verify32(1.1754943508e-45f);
verify("-0", -0.0f);
verify("10000000000000000000", 0x1.158e46p+63f);
verify("1090544144181609348835077142190", 0x1.b877ap+99f);
verify("1.1754943508e-38", 1.1754943508e-38f);
verify("30219.0830078125", 30219.0830078125f);
Expand Down