Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-14048: [C++][Gandiva] Cache only object code in memory instead of entire module #11193

Conversation

augustoasilva
Copy link
Contributor

@augustoasilva augustoasilva commented Sep 20, 2021

Implement Gandiva to cache object code instead the entire llvm module, improving the memory consumption.

To compare the optmization, the following test was written to verify the memory allocation used by the gandiva cache previous to this, and after this:

TEST_F(TestProjector, TestObjCache) {
  // schema for input fields
  auto field0 = field("f0", int32());
  auto field1 = field("f2", int32());
  auto field2 = field("f0", arrow::float64());
  auto field3 = field("f2", arrow::float64());
  auto field4 = field("f0", arrow::utf8());

  auto schema1 = arrow::schema({field0, field1});
  auto schema2 = arrow::schema({field2, field3});
  auto schema3 = arrow::schema({field4});

  // output fields
  auto field_sum = field("add", int32());
  auto field_sub = field("subtract", int32());
  auto field_mul = field("multiply", int32());
  auto field_div = field("divide", int32());
  auto field_eq = field("equal", arrow::boolean());
  auto field_lt = field("less_than", arrow::boolean());
  auto field_logb = arrow::field("logb", arrow::float64());
  auto field_power = arrow::field("power", arrow::float64());
  auto res_float8 = field("res_float8", arrow::float64());
  auto res_int4 = field("castBIGINT", arrow::int32());

  // Build expressions for schema1
  auto sum_expr = TreeExprBuilder::MakeExpression("add", {field0, field1}, field_sum);
  auto sub_expr =
      TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub);
  auto mul_expr =
      TreeExprBuilder::MakeExpression("multiply", {field0, field1}, field_mul);
  auto div_expr = TreeExprBuilder::MakeExpression("divide", {field0, field1}, field_div);
  auto eq_expr = TreeExprBuilder::MakeExpression("equal", {field0, field1}, field_eq);
  auto lt_expr = TreeExprBuilder::MakeExpression("less_than", {field0, field1}, field_lt);

  // Build expressions for schema2
  auto logb_expr = TreeExprBuilder::MakeExpression("log", {field2, field3}, field_logb);
  auto power_expr =
      TreeExprBuilder::MakeExpression("power", {field2, field3}, field_power);

  // Build expressions for schema3
  auto cast_expr_float8 =
      TreeExprBuilder::MakeExpression("castFLOAT8", {field4}, res_float8);
  auto cast_expr_int8 = TreeExprBuilder::MakeExpression("castINT", {field4}, res_int4);

  auto configuration = TestConfiguration();

  // Uses field0, field1 and schema1
  ExpressionVector exprVec1 = {sum_expr};
  ExpressionVector exprVec2 = {sub_expr};
  ExpressionVector exprVec3 = {mul_expr};
  ExpressionVector exprVec4 = {div_expr};
  ExpressionVector exprVec5 = {eq_expr};
  ExpressionVector exprVec6 = {lt_expr};

  // Uses field2, field3 and schema2
  ExpressionVector exprVec7 = {logb_expr};
  ExpressionVector exprVec8 = {power_expr};

  // Uses field4 and schema3
  ExpressionVector exprVec9 = {cast_expr_float8};
  ExpressionVector exprVec10 = {cast_expr_int8};

  unsigned int microsecond = 1000000;
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 1st projector
  std::shared_ptr<Projector> projector1;
  auto status = Projector::Make(schema1, exprVec1, configuration, &projector1);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // delete projector1 // -> gives the following error:
  // error: type ‘class std::shared_ptr<gandiva::Projector>’ argument given to ‘delete’, expected pointer

  //delete projector1.get(); // -> gives the following error:
  // double free() call. Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)

  // If this line is looged, thant the shared_ptr will be the only ptr holding the projector
  // so reseting it will explicit delete it.
  if(projector1.unique()){
    ARROW_LOG(INFO) << "ONLY PTR TO THE PROJECTOR INSTANCE.";
  }
  // safer way to delete the projector as the deleter param of shared_ptr is the delete func by default.
  // ref: https://en.cppreference.com/w/cpp/memory/shared_ptr/reset
  projector1.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 2nd projector
  std::shared_ptr<Projector> projector2;
  status = Projector::Make(schema1, exprVec2, configuration, &projector2);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector2.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 3rd projector
  std::shared_ptr<Projector> projector3;
  status = Projector::Make(schema1, exprVec3, configuration, &projector3);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector3.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 4th projector
  std::shared_ptr<Projector> projector4;
  status = Projector::Make(schema1, exprVec4, configuration, &projector4);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector4.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 5th projector
  std::shared_ptr<Projector> projector5;
  status = Projector::Make(schema1, exprVec5, configuration, &projector5);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector5.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 6th projector
  std::shared_ptr<Projector> projector6;
  status = Projector::Make(schema1, exprVec6, configuration, &projector6);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector6.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 7th projector
  std::shared_ptr<Projector> projector7;
  status = Projector::Make(schema2, exprVec7, configuration, &projector7);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector7.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 8th projector
  std::shared_ptr<Projector> projector8;
  status = Projector::Make(schema2, exprVec8, configuration, &projector8);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector8.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 9th projector
  std::shared_ptr<Projector> projector9;
  status = Projector::Make(schema3, exprVec9, configuration, &projector9);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector9.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second

  // 10th projector
  std::shared_ptr<Projector> projector10;
  status = Projector::Make(schema3, exprVec10, configuration, &projector10);
  ASSERT_OK(status);
  usleep(0.5 * microsecond);//sleeps for 0.5 second

  projector10.reset();

  usleep(0.5 * microsecond);//sleeps for 0.5 second
}

The results can be seen here:

Projector Module Cache Size (Bytes) Object Code Cache Size (Bytes) Optmization
Projector 01 1,640,843 97,920 94.03%
Projector 02 1,559,247 16,135 98.97%
Projector 03 1,559,247 16,135 98.97%
Projector 04 1,558,080 16,622 98.93%
Projector 05 1,593,485 65,399 95.90%
Projector 06 1,544,122 16,231 98.95%
Projector 07 1,859,869 17,691 99.05%
Projector 08 1,822,195 16,095 99.12%
Projector 09 1,552,585 16,208 98.96%
Projector 10 1,551,472 16,367 98.95%

To track the memory was used the Heaptrack application.

@github-actions
Copy link

@github-actions
Copy link

⚠️ Ticket has not been started in JIRA, please click 'Start Progress'.

cpp/src/gandiva/base_cache_key.h Outdated Show resolved Hide resolved
cpp/src/gandiva/greedy_dual_size_cache.h Outdated Show resolved Hide resolved
cpp/src/gandiva/greedy_dual_size_cache.h Outdated Show resolved Hide resolved
cpp/src/gandiva/engine.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/filter.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/llvm_generator.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/greedy_dual_size_cache.h Outdated Show resolved Hide resolved
cpp/src/gandiva/projector.h Outdated Show resolved Hide resolved
cpp/src/gandiva/llvm_generator.h Outdated Show resolved Hide resolved
cpp/src/gandiva/llvm_generator.h Outdated Show resolved Hide resolved
@projjal
Copy link
Contributor

projjal commented Sep 22, 2021

Implement Gandiva to cache object code instead the entire llvm module, improving the memory consumption and LLVM time perfomance.

This should not have any effect on llvm performance only the memory footprint. Can you also include the numbers in the description.
Also the wording in the description and pr title seems vague. change it to something like "Cache only ObjectCode in memory instead of entire module"

cpp/src/gandiva/cache.h Outdated Show resolved Hide resolved
@augustoasilva augustoasilva changed the title ARROW-14048: [C++][Gandiva] Implement Gandiva to Cache Object Code ARROW-14048: [C++][Gandiva] Implement Gandiva to Cache only ObjectCode in memory instead of entire module Sep 22, 2021
cpp/src/gandiva/engine.h Outdated Show resolved Hide resolved
@augustoasilva augustoasilva changed the title ARROW-14048: [C++][Gandiva] Implement Gandiva to Cache only ObjectCode in memory instead of entire module ARROW-14048: [C++][Gandiva] Make Gandiva to cache only object code in memory instead of entire module Sep 22, 2021
cpp/src/gandiva/cache.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/cache.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/engine.h Outdated Show resolved Hide resolved
cpp/src/gandiva/filter.cc Outdated Show resolved Hide resolved
@augustoasilva augustoasilva changed the title ARROW-14048: [C++][Gandiva] Make Gandiva to cache only object code in memory instead of entire module ARROW-14048: [C++][Gandiva] Cache only object code in memory instead of entire module Sep 23, 2021
@augustoasilva augustoasilva force-pushed the feature/implement-gandiva-to-cache-object-code branch 4 times, most recently from 8b12dc2 to 05e5200 Compare September 27, 2021 11:09
cpp/src/gandiva/cache.h Outdated Show resolved Hide resolved
cpp/src/gandiva/expression_cache_key.h Outdated Show resolved Hide resolved
cpp/src/gandiva/filter.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/filter.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/filter.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/gandiva_object_cache.h Outdated Show resolved Hide resolved
cpp/src/gandiva/llvm_generator.h Outdated Show resolved Hide resolved
cpp/src/gandiva/llvm_generator.h Outdated Show resolved Hide resolved
cpp/src/gandiva/projector.cc Outdated Show resolved Hide resolved
cpp/src/gandiva/gandiva_object_cache.h Outdated Show resolved Hide resolved
@augustoasilva augustoasilva force-pushed the feature/implement-gandiva-to-cache-object-code branch from 547447b to 61b98c5 Compare November 16, 2021 11:08
cpp/src/gandiva/cache.h Outdated Show resolved Hide resolved
@pravindra pravindra closed this in 188c94a Dec 3, 2021
@ursabot
Copy link

ursabot commented Dec 3, 2021

Benchmark runs are scheduled for baseline = a6c81e6 and contender = 188c94a. 188c94a is a master commit associated with this PR. Results will be available as each benchmark for each run completes.
Conbench compare runs links:
[Finished ⬇️0.0% ⬆️0.0%] ec2-t3-xlarge-us-east-2
[Failed ⬇️0.0% ⬆️0.0%] ursa-i9-9960x
[Finished ⬇️0.22% ⬆️0.0%] ursa-thinkcentre-m75q
Supported benchmarks:
ursa-i9-9960x: langs = Python, R, JavaScript
ursa-thinkcentre-m75q: langs = C++, Java
ec2-t3-xlarge-us-east-2: cloud = True

@anthonylouisbsb anthonylouisbsb deleted the feature/implement-gandiva-to-cache-object-code branch December 4, 2021 00:12
Copy link

@recardozo recardozo left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The best pull request <3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants