Skip to content

Commit

Permalink
Added new binary functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Johnnathanalmeida committed May 11, 2022
1 parent a8479e9 commit 365b452
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 0 deletions.
8 changes: 8 additions & 0 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,14 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
kResultNullIfNull, "right_utf8_int32",
NativeFunction::kNeedsContext),

NativeFunction("binary", {}, DataTypeVector{binary()}, binary(),
kResultNullIfNull, "castBINARY_binary",
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),

NativeFunction("binary", {}, DataTypeVector{utf8()}, binary(),
kResultNullIfNull, "castBINARY_utf8",
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),

NativeFunction("castVARBINARY", {}, DataTypeVector{binary(), int64()}, binary(),
kResultNullIfNull, "castVARBINARY_binary_int64",
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
Expand Down
19 changes: 19 additions & 0 deletions cpp/src/gandiva/precompiled/string_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,25 @@ CAST_VARCHAR_FROM_VARLEN_TYPE(binary)
CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8)
CAST_VARBINARY_FROM_STRING_AND_BINARY(binary)

#define CAST_BINARY_FROM_STRING_AND_BINARY(TYPE) \
GANDIVA_EXPORT \
const char* castBINARY_##TYPE(gdv_int64 context, const char* data, \
gdv_int32 data_len, int32_t* out_length) { \
int32_t len = static_cast<int32_t>(data_len); \
if (len < 0) { \
gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
*out_length = 0; \
return ""; \
} \
\
*out_length = len; \
\
return data; \
}

CAST_BINARY_FROM_STRING_AND_BINARY(utf8)
CAST_BINARY_FROM_STRING_AND_BINARY(binary)

#undef CAST_VARBINARY_FROM_STRING_AND_BINARY

#define IS_NULL(NAME, TYPE) \
Expand Down
42 changes: 42 additions & 0 deletions cpp/src/gandiva/precompiled/string_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,48 @@ TEST(TestGdvFnStubs, TestCastVarbinaryBinary) {
ctx.Reset();
}

TEST(TestGdvFnStubs, TestCastBinaryUtf8) {
gandiva::ExecutionContext ctx;

int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
const char* input = "abc";
const char* out;

out = castBINARY_utf8(ctx_ptr, input, 3, &out_len);
EXPECT_EQ(std::string(out, out_len), input);

out = castBINARY_utf8(ctx_ptr, input, 1, &out_len);
EXPECT_EQ(std::string(out, out_len), "a");

out = castBINARY_utf8(ctx_ptr, input, -3, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Output buffer length can't be negative"));
ctx.Reset();
}

TEST(TestGdvFnStubs, TestCastBinaryBinary) {
gandiva::ExecutionContext ctx;

int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
const char* input = "\\x41\\x42\\x43";
const char* out;

out = castBINARY_binary(ctx_ptr, input, 12,&out_len);
EXPECT_EQ(std::string(out, out_len), input);

out = castBINARY_binary(ctx_ptr, input, 8,&out_len);
EXPECT_EQ(std::string(out, out_len), "\\x41\\x42");

out = castBINARY_binary(ctx_ptr, input,-10, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Output buffer length can't be negative"));
ctx.Reset();
}

TEST(TestStringOps, TestConcat) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/gandiva/precompiled/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,12 @@ const char* castVARBINARY_binary_int64(gdv_int64 context, const char* data,
gdv_int32 data_len, int64_t out_len,
int32_t* out_length);

const char* castBINARY_utf8(gdv_int64 context, const char* data,
gdv_int32 data_len, int32_t* out_length);

const char* castBINARY_binary(gdv_int64 context, const char* data,
gdv_int32 data_len, int32_t* out_length);

gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const char* in2,
int32_t in2_len);

Expand Down
116 changes: 116 additions & 0 deletions cpp/src/gandiva/tests/projector_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2674,4 +2674,120 @@ TEST_F(TestProjector, TestRegexpExtract) {
EXPECT_ARROW_ARRAY_EQUALS(exp_extract, outputs.at(0));
}


TEST_F(TestProjector, TestCastVarbinary) {
auto field0 = field("f0", arrow::utf8());
auto field1 = field("f1", arrow::int64());
auto schema = arrow::schema({field0, field1});

// output fields
auto res_out1 = field("res_out1", arrow::binary());

// Build expression
auto cast_expr_1 = TreeExprBuilder::MakeExpression("castVARBINARY", {field0, field1}, res_out1);

std::shared_ptr<Projector> projector;

auto status = Projector::Make(
schema, {cast_expr_1},
TestConfiguration(), &projector);

EXPECT_TRUE(status.ok());

// Create a row-batch with some sample data
int num_records = 2;

auto array0 = MakeArrowArrayUtf8(
{"a", "abc"}, {true, true});

auto array1 = MakeArrowArrayInt64(
{1, 3}, {true, true});

auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1});

auto out_1 = MakeArrowArrayBinary({"a", "abc"}, {true, true});

arrow::ArrayVector outputs;

// Evaluate expression
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok());

EXPECT_ARROW_ARRAY_EQUALS(out_1, outputs.at(0));

}

TEST_F(TestProjector, TestCastBinaryUTF) {
auto field0 = field("f0", arrow::utf8());
auto schema = arrow::schema({field0});

// output fields
auto res_out1 = field("res_out1", arrow::binary());

// Build expression
auto cast_expr_1 = TreeExprBuilder::MakeExpression("binary", {field0}, res_out1);

std::shared_ptr<Projector> projector;

auto status = Projector::Make(
schema, {cast_expr_1},
TestConfiguration(), &projector);

EXPECT_TRUE(status.ok());

// Create a row-batch with some sample data
int num_records = 2;

auto array0 = MakeArrowArrayUtf8(
{"a", "abc"}, {true, true});

auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});

auto out_1 = MakeArrowArrayBinary({"a", "abc"}, {true, true});

arrow::ArrayVector outputs;

// Evaluate expression
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok());

EXPECT_ARROW_ARRAY_EQUALS(out_1, outputs.at(0));
}

TEST_F(TestProjector, TestCastBinaryBinary) {
auto field0 = field("f0", arrow::binary());
auto schema = arrow::schema({field0});

// output fields
auto res_out1 = field("res_out1", arrow::binary());

// Build expression
auto cast_expr_1 = TreeExprBuilder::MakeExpression("binary", {field0}, res_out1);

std::shared_ptr<Projector> projector;

auto status = Projector::Make(
schema, {cast_expr_1},
TestConfiguration(), &projector);

EXPECT_TRUE(status.ok());

// Create a row-batch with some sample data
int num_records = 2;

auto array0 = MakeArrowArrayUtf8(
{"\\x41\\x42\\x43", "\\x41\\x42"}, {true, true});

auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});

auto out_1 = MakeArrowArrayBinary({"\\x41\\x42\\x43", "\\x41\\x42"}, {true, true});

arrow::ArrayVector outputs;

// Evaluate expression
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok());

EXPECT_ARROW_ARRAY_EQUALS(out_1, outputs.at(0));
}
} // namespace gandiva

0 comments on commit 365b452

Please sign in to comment.