Skip to content

Commit

Permalink
ARROW-15699: [C++][Gandiva] Fix implementation of left and right func…
Browse files Browse the repository at this point in the history
…tions to handle more cases

Proper handling of multibyte characters

Added conditions to handle below cases:
case where left('abcdef', -6) -> "" and left('abcdef', -7) -> ""
case where right('abcdef', -6) -> "" and right('abcdef', -7) -> ""
  • Loading branch information
Nivia committed Mar 17, 2022
1 parent 61eac98 commit 5ee9753
Showing 1 changed file with 5 additions and 19 deletions.
24 changes: 5 additions & 19 deletions cpp/src/gandiva/precompiled/string_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2172,7 +2172,7 @@ const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_
}

// case where left('abcdef', -6) -> "" and left('abcdef', -7) -> ""
if(number < 0 && abs(number) >= char_count) {
if(number < 0 && -(number) >= char_count) {
*out_len = 0;
return "";
}
Expand Down Expand Up @@ -2223,13 +2223,12 @@ const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text
}

//case where right('abcdef', -6) -> "" and right('abcdef', -7) -> ""
if(number < 0 && abs(number) >= char_count) {
if(number < 0 && -(number) >= char_count) {
*out_len = 0;
return "";
}

int32_t start_char_pos; // the char result start position (inclusive)
int32_t end_pos; // the char result end position (inclusive)

if (number > 0) {
// case where right('abc', 5) ==> 'abc' start_char_pos=1.
Expand All @@ -2238,26 +2237,13 @@ const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text
start_char_pos = number * -1;
}

end_pos = char_count;

// calculate the start byte and end byte position
// calculate the start byte position
int32_t start_byte_pos = utf8_byte_pos(context, text, text_len, start_char_pos);
int32_t end_byte_pos = utf8_byte_pos(context, text, text_len, end_pos);

//calculate output length
*out_len = (end_byte_pos - start_byte_pos);
*out_len = (text_len - start_byte_pos);

// try to allocate memory for the response
char* ret =
reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, *out_len));
if (ret == nullptr) {
gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
*out_len = 0;
return "";
}

memcpy(ret, text + start_byte_pos, *out_len);
return ret;
return text + start_byte_pos;
}

FORCE_INLINE
Expand Down

0 comments on commit 5ee9753

Please sign in to comment.