-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-9928: [C++] Speed up integer parsing slightly #8104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
Thanks for opening a pull request! Could you open an issue for this pull request on JIRA? Then could you also rename pull request title in the following format? See also: |
|
Hi and thank you for posting a PR. I recommend you read a bit about the contribution guidelines here: Also, can you explain in which situation this marks an improvement? Any benchmark numbers perhaps? |
|
On my windows machine, test result. BM_ParseUnsignedBreak is use break statment to break unroll loop. (/O2 /Oi) On my linux machines, (/O3) Bellow is my benchmark code. #include <stdlib.h>
#include <time.h>
#include <iostream>
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#if defined(__GNUC__)
#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#define ARROW_NORETURN __attribute__((noreturn))
#define ARROW_NOINLINE __attribute__((noinline))
#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
#elif defined(_MSC_VER)
#define ARROW_NORETURN __declspec(noreturn)
#define ARROW_NOINLINE __declspec(noinline)
#define ARROW_PREDICT_FALSE(x) (x)
#define ARROW_PREDICT_TRUE(x) (x)
#define ARROW_PREFETCH(addr)
#else
#define ARROW_NORETURN
#define ARROW_PREDICT_FALSE(x) (x)
#define ARROW_PREDICT_TRUE(x) (x)
#define ARROW_PREFETCH(addr)
#endif
#define PARSE_UNSIGNED_ITERATION_BREAK(C_TYPE) \
if (length > 0) { \
uint8_t digit = ParseDecimalDigit(*s++); \
result = static_cast<C_TYPE>(result * 10U); \
length--; \
if (ARROW_PREDICT_FALSE(digit > 9U)) { \
/* Non-digit */ \
return false; \
} \
result = static_cast<C_TYPE>(result + digit); \
} else { \
break; \
}
#define PARSE_UNSIGNED_ITERATION(C_TYPE) \
if (length > 0) { \
uint8_t digit = ParseDecimalDigit(*s++); \
result = static_cast<C_TYPE>(result * 10U); \
length--; \
if (ARROW_PREDICT_FALSE(digit > 9U)) { \
/* Non-digit */ \
return false; \
} \
result = static_cast<C_TYPE>(result + digit); \
}
#define PARSE_UNSIGNED_ITERATION_LAST(C_TYPE) \
if (length > 0) { \
if (ARROW_PREDICT_FALSE(result > \
std::numeric_limits<C_TYPE>::max() / 10U)) { \
/* Overflow */ \
return false; \
} \
uint8_t digit = ParseDecimalDigit(*s++); \
result = static_cast<C_TYPE>(result * 10U); \
C_TYPE new_result = static_cast<C_TYPE>(result + digit); \
if (ARROW_PREDICT_FALSE(--length > 0)) { \
/* Too many digits */ \
return false; \
} \
if (ARROW_PREDICT_FALSE(digit > 9U)) { \
/* Non-digit */ \
return false; \
} \
if (ARROW_PREDICT_FALSE(new_result < result)) { \
/* Overflow */ \
return false; \
} \
result = new_result; \
}
inline uint8_t ParseDecimalDigit(char c) {
return static_cast<uint8_t>(c - '0');
}
inline bool ParseUnsignedBreak(const char* s, size_t length, uint64_t* out) {
uint64_t result = 0;
do {
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint64_t);
PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
} while (false);
*out = result;
return true;
}
inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) {
uint64_t result = 0;
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION(uint64_t);
PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
*out = result;
return true;
}
inline bool ParseUnsignedBreak(const char* s, size_t length, uint32_t* out) {
uint32_t result = 0;
do {
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_BREAK(uint32_t);
PARSE_UNSIGNED_ITERATION_LAST(uint32_t);
} while (false);
*out = result;
return true;
}
static uint64_t x = 0;
static void BM_ParseUnsigned(benchmark::State& state) {
// Perform setup here
std::vector<std::string> samples;
for (size_t i = 0; i < 22; i++) {
samples.emplace_back(std::string(i, '1'));
}
for (auto _ : state) {
// This code gets timed
for (auto& sample : samples) {
uint64_t val;
ParseUnsigned(sample.c_str(), sample.size(), &val);
x += val;
}
}
}
static void BM_ParseUnsignedBreak(benchmark::State& state) {
// Perform setup here
std::vector<std::string> samples;
for (size_t i = 0; i < 22; i++) {
samples.emplace_back(std::string(i, '1'));
}
for (auto _ : state) {
// This code gets timed
for (auto& sample : samples) {
uint64_t val;
ParseUnsignedBreak(sample.c_str(), sample.size(), &val);
x += val;
}
}
// std::cout << x << std::endl;
}
// Register the function as a benchmark
BENCHMARK(BM_ParseUnsignedBreak);
BENCHMARK(BM_ParseUnsigned);
// Run the benchmark
BENCHMARK_MAIN();
|
Hi, I find parse unsigned digits is slow. Expanded loop not exit when length equals zero.
|
Thank you. The speed up is relatively minor, but I can confirm it on Ubuntu 20.04 with clang 10. |
|
Integer parsing micro-benchmarks:
|
|
CSV conversion micro-benchmark:
|
By exiting early out of the parsing routine when the input is exhausted, we can save a little bit a processing time.