Support Arrow dictionary serialization:

- Put serialized schema and dictionary together on system memory due to limited APIs exported by Arrow serializer; - Remove duplicate schema from serialized records on video memory.
heavyai · Jun 29, 2017 · baf2945 · baf2945 · wesm · Jul 17, 2017
1 parent 1fab4c2
commit baf2945
Show file tree

Hide file tree

Showing 6 changed files with 293 additions and 108 deletions.
diff --git a/QueryEngine/ResultSet.h b/QueryEngine/ResultSet.h
@@ -31,6 +31,7 @@
 #include "../Chunk/Chunk.h"
 
 #ifdef ENABLE_ARROW_CONVERTER
+#include "arrow/ipc/metadata.h"
 #include "arrow/table.h"
 // Arrow defines macro UNUSED conflict w/ that in jni_md.h
 #ifdef UNUSED
@@ -206,7 +207,8 @@ struct OneIntegerColumnRow {
 
 #ifdef ENABLE_ARROW_CONVERTER
 struct ArrowResult {
-  std::shared_ptr<arrow::Buffer> schema;
+  std::vector<char> sm_handle;
+  int64_t sm_size;
   std::vector<char> df_handle;
   int64_t df_size;
 };
@@ -447,7 +449,8 @@ class ResultSet {
   int getGpuCount() const;
 
 #ifdef ENABLE_ARROW_CONVERTER
-  arrow::RecordBatch convertToArrow(const std::vector<std::string>& col_names) const;
+  arrow::RecordBatch convertToArrow(const std::vector<std::string>& col_names, arrow::ipc::DictionaryMemo& memo) const;
+  std::shared_ptr<const std::vector<std::string>> getDictionary(const int dict_id) const;
   std::pair<std::vector<std::shared_ptr<arrow::Array>>, size_t> getArrowColumns(
       const std::vector<std::shared_ptr<arrow::Field>>& fields) const;