Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-3060: [C++] Factor out string-to-X conversion routines #2433

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 10 additions & 110 deletions cpp/src/arrow/compute/kernels/cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@

#include "arrow/compute/kernels/cast.h"

#include <cerrno>
#include <cstdint>
#include <cstring>
#include <functional>
#include <limits>
#include <locale>
#include <memory>
#include <sstream>
#include <string>
Expand All @@ -40,6 +38,7 @@
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "arrow/util/parsing.h"

#include "arrow/compute/context.h"
#include "arrow/compute/kernel.h"
Expand Down Expand Up @@ -732,72 +731,6 @@ struct CastFunctor<T, DictionaryType,
// ----------------------------------------------------------------------
// String to Number

// Cast a string to a number. Returns true on success, false on error.
// We rely on C++ istringstream for locale-independent parsing, which might
// not be the fastest option.

template <typename T>
typename std::enable_if<std::is_floating_point<T>::value,
bool>::type static CastStringToNumber(std::istringstream& ibuf,
T* out) {
ibuf >> *out;
return !ibuf.fail() && ibuf.eof();
}

// For integers, not all integer widths are handled by the C++ stdlib, so
// we check for limits outselves.

template <typename T>
typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value,
bool>::type static CastStringToNumber(std::istringstream& ibuf,
T* out) {
static constexpr bool need_long_long = sizeof(T) > sizeof(long); // NOLINT
static constexpr T min_value = std::numeric_limits<T>::min();
static constexpr T max_value = std::numeric_limits<T>::max();

if (need_long_long) {
long long res; // NOLINT
ibuf >> res;
*out = static_cast<T>(res); // may downcast
if (res < min_value || res > max_value) {
return false;
}
} else {
long res; // NOLINT
ibuf >> res;
*out = static_cast<T>(res); // may downcast
if (res < min_value || res > max_value) {
return false;
}
}
return !ibuf.fail() && ibuf.eof();
}

template <typename T>
typename std::enable_if<std::is_integral<T>::value && std::is_unsigned<T>::value,
bool>::type static CastStringToNumber(std::istringstream& ibuf,
T* out) {
static constexpr bool need_long_long = sizeof(T) > sizeof(unsigned long); // NOLINT
static constexpr T max_value = std::numeric_limits<T>::max();

if (need_long_long) {
unsigned long long res; // NOLINT
ibuf >> res;
*out = static_cast<T>(res); // may downcast
if (res > max_value) {
return false;
}
} else {
unsigned long res; // NOLINT
ibuf >> res;
*out = static_cast<T>(res); // may downcast
if (res > max_value) {
return false;
}
}
return !ibuf.fail() && ibuf.eof();
}

template <typename O>
struct CastFunctor<O, StringType, enable_if_number<O>> {
void operator()(FunctionContext* ctx, const CastOptions& options,
Expand All @@ -806,19 +739,17 @@ struct CastFunctor<O, StringType, enable_if_number<O>> {

StringArray input_array(input.Copy());
auto out_data = GetMutableValues<out_type>(output, 1);
errno = 0;
// Instantiate the stringstream outside of the loop
std::istringstream ibuf;
ibuf.imbue(std::locale::classic());
internal::StringConverter<O> converter;

for (int64_t i = 0; i < input.length; ++i, ++out_data) {
if (input_array.IsNull(i)) {
continue;
}
auto str = input_array.GetString(i);
ibuf.clear();
ibuf.str(str);
if (!CastStringToNumber(ibuf, out_data)) {

int32_t length = -1;
auto str = input_array.GetValue(i, &length);
if (!converter(reinterpret_cast<const char*>(str), static_cast<size_t>(length),
out_data)) {
std::stringstream ss;
ss << "Failed to cast String '" << str << "' into " << output->type->ToString();
ctx->SetStatus(Status(StatusCode::Invalid, ss.str()));
Expand All @@ -831,38 +762,6 @@ struct CastFunctor<O, StringType, enable_if_number<O>> {
// ----------------------------------------------------------------------
// String to Boolean

// Helper function to cast a C string to a boolean. Returns true on success,
// false on error.

static bool CastStringtoBoolean(const char* s, size_t length, bool* out) {
if (length == 1) {
// "0" or "1"?
if (s[0] == '0') {
*out = false;
return true;
}
if (s[0] == '1') {
*out = true;
return true;
}
return false;
}
if (length == 4) {
// "true"?
*out = true;
return ((s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
(s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E'));
}
if (length == 5) {
// "false"?
*out = false;
return ((s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
(s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
(s[4] == 'e' || s[4] == 'E'));
}
return false;
}

template <typename O>
struct CastFunctor<O, StringType,
typename std::enable_if<std::is_same<BooleanType, O>::value>::type> {
Expand All @@ -871,6 +770,7 @@ struct CastFunctor<O, StringType,
StringArray input_array(input.Copy());
internal::FirstTimeBitmapWriter writer(output->buffers[1]->mutable_data(),
output->offset, input.length);
internal::StringConverter<O> converter;

for (int64_t i = 0; i < input.length; ++i) {
if (input_array.IsNull(i)) {
Expand All @@ -881,8 +781,8 @@ struct CastFunctor<O, StringType,
int32_t length = -1;
auto str = input_array.GetValue(i, &length);
bool value;
if (!CastStringtoBoolean(reinterpret_cast<const char*>(str),
static_cast<size_t>(length), &value)) {
if (!converter(reinterpret_cast<const char*>(str), static_cast<size_t>(length),
&value)) {
std::stringstream ss;
ss << "Failed to cast String '" << input_array.GetString(i) << "' into "
<< output->type->ToString();
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,16 @@ template <typename T>
using enable_if_integer =
typename std::enable_if<std::is_base_of<Integer, T>::value>::type;

template <typename T>
using enable_if_signed_integer =
typename std::enable_if<std::is_base_of<Integer, T>::value &&
std::is_signed<typename T::c_type>::value>::type;

template <typename T>
using enable_if_unsigned_integer =
typename std::enable_if<std::is_base_of<Integer, T>::value &&
std::is_unsigned<typename T::c_type>::value>::type;

template <typename T>
using enable_if_floating_point =
typename std::enable_if<std::is_base_of<FloatingPoint, T>::value>::type;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ ADD_ARROW_TEST(compression-test)
ADD_ARROW_TEST(decimal-test)
ADD_ARROW_TEST(key-value-metadata-test)
ADD_ARROW_TEST(rle-encoding-test)
ADD_ARROW_TEST(parsing-util-test)
ADD_ARROW_TEST(stl-util-test)
ADD_ARROW_TEST(thread-pool-test)
ADD_ARROW_TEST(lazy-test)
Expand Down
Loading