Skip to content
Permalink
Browse files

Oops, miss these two files as well

  • Loading branch information...
Shuxin Yang
Shuxin Yang committed Jan 14, 2015
1 parent ab36d62 commit addc5e3dfeba64dc34184e119af7baab3de9bf28
Showing with 378 additions and 0 deletions.
  1. +323 −0 scan_fp_relax.c
  2. +55 −0 scan_fp_strict.c
@@ -0,0 +1,323 @@
/*****************************************************************************
*
* This file tries to parse floating point literal quickly. There are two
* implementations in this files:
*
* 1). The one very relaxed floating point mode (FP_RELAX >= 2), and
* 2). The one with *almost* restrict mode. (FP_RELAX == 1).
*
* In variant 1), we evaluate a floating point number, say 12.345E12 as following:
* a) let d1 = 12
* b) let d2 = 345 * (10**-2 * 10 **-1) (NOTE: reciprocal is very imprecise)
* c) let d3 = d1 + d2
* d) let result = d3 * (10**3 + 10**2)
*
* The reason and the only reason to keep the toy-grade variant-1 is to set
* a bar (in terms of parsing speed) for the future work).
*
* Variant-2) is *almost* restrict. It can efficiently parse a floating point
* literal if it's in the form of nnnn.mmm, and the integer part contains no
* more than 20 digits, fraction part contains than 16 digits (it the liternal
* does not satisfy this restrct, it would resort to expensive strtod() libc
* function call). Variant 2) evaluate a liternal, say 123.456 this way:
* a) let d1 = 123
* b) let d2 = 456/10**3
* c) let result = d1 + d2
*
* FIXME: {
* case 1: If interger part is 0 (d1 == 0), the "result" is precise
* unless the rounding mode we are using in the '/' operator
* is not what json expect (But does Json spec define which
* rounding mode should we go).
*
* case 2: If the interger-part > (1<<53), "result == d1" should hold.
* case 3: If the integer-part < (1<<53), the rounding in step b) could
* ripple to step c) and hence incur 1/(2**53) relative error.
* }
*
* TODO: Implement the algorithm depicted in
* http://www.exploringbinary.com/correct-decimal-to-floating-point-using-big-integers/,
* Make sure the common cases can be parsed as fast as the variant-1
* and variant-2.
*/
#include <stdint.h>
#include <stdlib.h> /* for str2od() */
#include "util.h"
#include "scan_fp.h"

#if FP_RELAX >= 2

/* HINT: max double = 1.797693E+308, min-double = 2.225074E-308 */
#define MAX_EXP_ABS 308

static double pos_pow10[22] = { 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17,
1e18, 1e19, 1e20, 1e21};

static double neg_pow10[22] = { 0.1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7,
1e-8, 1e-9, 1e-10, 1e-11, 1e-12, 1e-13, 1e-14,
1e-15, 1e-16, 1e-17, 1e-18, 1e-19, 1e-20,
1e-21};

static double
mypow10(int exp, int negative) {
ASSERT(exp >= 0 && exp <= MAX_EXP_ABS);
if (exp < sizeof(pos_pow10)/sizeof(pos_pow10[0])) {
return negative ? neg_pow10[exp] : pos_pow10[exp];
}

static const double exp_fact1[9] = {
1e1, 1e2, 1e4, 1e8, 1e16, 1e32, 1e64, 1e128, 1e256
};

static const double exp_fact2[9] = {
1e-1, 1e-2, 1e-4, 1e-8, 1e-16, 1e-32, 1e-64, 1e-128, 1e-256
};

double val = 1;
const double* dbl_fact = negative ? exp_fact2 : exp_fact1;

int idx = 0;
while (exp) {
if (exp & 1) {
val *= dbl_fact[idx];
}
exp = exp >> 1;
idx++;
}

return val;
}

int
scan_fp(const char** scan_ptr, const char* str_end, int_db_union_t* result) {
const char* str, *p;
str = p = *scan_ptr;
int negative = 0;

if (*p == '-') {
negative = 1;
p++;
}

int int_len = 0;
int64_t int_val = 0;

/* step 1: Calculate the integer part */
while (p < str_end) {
char c = *p;
if (c >= '0' && c <= '9') {
int_val = c - '0' + int_val * 10;
p++;
} else {
break;
}
}

int_len = p - str;
if (unlikely(p >= str_end) || unlikely(int_len >= 20)) {
/*The "len < 20" condition is to guaranteed the value fit in int64_t.*/
goto too_nasty;
}

char c = *p;
if (c != '.' && ((c | 0x20) != 'e')) {
result->int_val = negative ? - int_val : int_val;
*scan_ptr = p;
return 1;
}

/* step 2: Calculate the fraction part */
double frac = 0.0;
int frac_len = 0;
if (c == '.') {
const char* frac_start = ++p;
while (p < str_end) {
char c = *p;
if (c >= '0' && c <= '9') {
frac = c - '0' + frac * 10;
p++;
} else {
break;
}
}

frac_len = p - frac_start;
if (frac_len > 20) {
goto too_nasty;
}
frac = frac * mypow10(frac_len, 1);
}

if (unlikely(p >= str_end)) {
/* The floating-point literal per se is nothing wrong. However, this
* condition implies that the literal is the last token of the json
* being processed, which is not correct.
*/
return 0;
}

/* step 3: Calculate the exponent part */
double dbl_result = (double)int_val + frac;
if (negative)
dbl_result = - dbl_result;

c = *p;
int exp = 0;
if ((c | 0x20) == 'e') {
if (int_len != 1)
goto too_nasty;

p++;
int neg_exp = 0;
if (p < str_end && *p == '-') {
neg_exp = 1;
p++;
}

while (p < str_end) {
char c = *p;
if (c >= '0' && c <= '9') {
exp = c - '0' + exp * 10;
/* HINT: max double = 1.797693E+308,
* min-double = 2.225074E-308
*/
if (exp >= 308)
goto too_nasty;
p++;
} else {
break;
}
}

dbl_result *= mypow10(exp, neg_exp);
}
result->db_val = dbl_result;
*scan_ptr = p;
return 2;

too_nasty:
{
fprintf(stderr, "too nasty %s!\n", str);
char* fp_end;
double d = strtod(str, &fp_end);
if (fp_end != str) {
result->db_val = d;
*scan_ptr = fp_end;
return 2;
}
}
return 0;
}
#endif

#if FP_RELAX == 1

/* If the fraction part can fit in 53-bit, it can be represented by a
* "double"-typed value exactly. The "((long long)1 << 53) - 1" evaluates to
* 9007199254740991 which has 16 digits. If the faction part has more than 16
* digit, we simply give up.
*/
#define MAX_FRAC_LEN 16

static double
mypow10(int exp) {
ASSERT(exp <= MAX_FRAC_LEN);
static double pos_pow10[16] = { 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15 };

return pos_pow10[exp];
}

int
scan_fp(const char** scan_ptr, const char* str_end, int_db_union_t* result) {
const char* str, *p;
str = p = *scan_ptr;
int negative = 0;

if (*p == '-') {
negative = 1;
p++;
}

int int_len = 0;
int64_t int_val = 0;

/* step 1: Calculate the integer part */
while (p < str_end) {
char c = *p;
if (c >= '0' && c <= '9') {
int_val = c - '0' + int_val * 10;
p++;
} else {
break;
}
}

int_len = p - str;
if (unlikely(p >= str_end) || unlikely(int_len >= 20)) {
/*The "len < 20" condition is to guaranteed the value fit in int64_t.*/
goto too_nasty;
}

char c = *p;
if (c != '.' && ((c | 0x20) != 'e')) {
result->int_val = negative ? - int_val : int_val;
*scan_ptr = p;
return 1;
}

/* step 2: Calculate the fraction part */
int64_t frac_int = 0;
int frac_len = 0;
if (c == '.') {
const char* frac_start = ++p;
while (p < str_end) {
char c = *p;
if (c >= '0' && c <= '9') {
frac_int = c - '0' + frac_int * 10;
p++;
} else {
break;
}
}

frac_len = p - frac_start;
if (frac_len >= (((int64_t)1) << 53) - 1) {
/* make sure frac_len can fit in 53 bit, such that it can be
* represented exactly by a double.
*/
goto too_nasty;
}
}

if (unlikely(p >= str_end)) {
/* The floating-point literal per se is nothing wrong. However, this
* condition implies that the literal is the last token of the json
* being processed, which is not correct.
*/
return 0;
}

/* step 3: give up if it's in scientific notation */
if ((*p | 0x20) == 'e') {
goto too_nasty;
}

result->db_val = int_val + (double)frac_int / mypow10(frac_len);
*scan_ptr = p;
return 2;

too_nasty:
{
char* fp_end;
double d = strtod(str, &fp_end);
if (fp_end != str) {
result->db_val = d;
*scan_ptr = fp_end;
return 2;
}
}
return 0;
}
#endif
@@ -0,0 +1,55 @@
#include <stdint.h>
#include <stdlib.h> /* for str2od() */
#include "util.h"
#include "scan_fp.h"

#if FP_RELAX == 0
/* i.e strict floating point mode */

int
scan_fp(const char** scan_str, const char* str_e, int_db_union_t* result) {
const char* str_save = *scan_str;
const char* str = *scan_str;

int is_negative = (*str == '-') ? 1 : 0;
str += is_negative;

/* More often than not, the number is of interger type that can fit in
* int64_t. So, we speculatively try to convert input string into
* an int64_t as we go along. In case it turns out to be a floating
* point number, or the interger is too big to fit in int64_t, we start
* over converting the string to "double"-typed value.
*/
int64_t int_val = 0;

while (str < str_e) {
char c = *str;
if (c >= '0' && c <= '9') {
int_val = int_val * 10 + (c - '0');
str++;
} else {
if (c != '.' && (c | 0x20) != 'e') {
if (str - str_save < 20) {
/* It's guaranteed to fit in int64_t */
if (!is_negative) {
result->int_val = int_val;
} else {
result->int_val = - int_val;
}
*scan_str = str;
return 1;
}
}

double d = strtod(str_save, (char**)scan_str);
if (*scan_str != str_save) {
result->db_val = d;
return 2;
}
return 0;
}
}
return 0;
}

#endif /* FP_RELAX == 0 */

0 comments on commit addc5e3

Please sign in to comment.
You can’t perform that action at this time.