From d234a2b06f9b6a2b8b9cccc3fdfe77337a89268f Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 22 Oct 2021 09:16:27 -0400 Subject: [PATCH 001/194] ARROW-14416: [R] Fix package installation on the Raspberry Pi I've added -latomic to PKG_LIBS, will need to test this though Closes #11506 from thisisnic/ARROW-14416_pi Authored-by: Nic Crane Signed-off-by: Neal Richardson --- r/configure | 1 + 1 file changed, 1 insertion(+) diff --git a/r/configure b/r/configure index cd2314949bf2d..13177c5875f35 100755 --- a/r/configure +++ b/r/configure @@ -204,6 +204,7 @@ fi # See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81358 for similar example if grep raspbian /etc/os-release >/dev/null 2>&1; then PKG_CFLAGS="$PKG_CFLAGS -DARROW_CXXFLAGS=-latomic" + PKG_LIBS="-latomic $PKG_LIBS" fi # If libarrow uses the old GLIBCXX ABI, so we have to use it too From 9ce3440670a9bbb0204e88af5466e39612b649d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dragos=20Moldovan-Gr=C3=BCnfeld?= Date: Fri, 22 Oct 2021 15:47:33 +0100 Subject: [PATCH 002/194] ARROW-13156 [R] bindings for str_count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #11473 from dragosmg/ARROW-13156_str_count_bindings Lead-authored-by: Dragos Moldovan-Grünfeld Co-authored-by: Dragoș Moldovan-Grünfeld Signed-off-by: Nic Crane --- r/R/dplyr-functions.R | 13 +++++ r/tests/testthat/test-dplyr-funcs-string.R | 60 ++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index dbb9d5f46f603..717cdae966275 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -645,6 +645,19 @@ nse_funcs$str_ends <- function(string, pattern, negate = FALSE) { out } +nse_funcs$str_count <- function(string, pattern) { + opts <- get_stringr_pattern_options(enexpr(pattern)) + if (!is.string(pattern)) { + arrow_not_supported("`pattern` must be a length 1 character vector; other values") + } + arrow_fun <- ifelse(opts$fixed, "count_substring", "count_substring_regex") + Expression$create( + arrow_fun, + string, + options = list(pattern = opts$pattern, ignore_case = opts$ignore_case) + ) +} + # String function helpers # format `pattern` as needed for case insensitivity and literal matching by RE2 diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R index dd59b5ac55da5..333735be4f093 100644 --- a/r/tests/testthat/test-dplyr-funcs-string.R +++ b/r/tests/testthat/test-dplyr-funcs-string.R @@ -1336,3 +1336,63 @@ test_that("str_starts, str_ends, startsWith, endsWith", { df ) }) + +test_that("str_count", { + df <- tibble( + cities = c("Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio", "Cluj Napoca", "Bern", "Bogota"), + dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d..") + ) + + expect_dplyr_equal( + input %>% + mutate(a_count = str_count(cities, pattern = "a")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(p_count = str_count(cities, pattern = "d")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(p_count = str_count(cities, + pattern = regex("d", ignore_case = TRUE) + )) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(e_count = str_count(cities, pattern = "u")) %>% + collect(), + df + ) + + # nse_funcs$str_count() is not vectorised over pattern + expect_dplyr_equal( + input %>% + mutate(let_count = str_count(cities, pattern = c("a", "b", "e", "g", "p", "n", "s"))) %>% + collect(), + df, + warning = TRUE + ) + + expect_dplyr_equal( + input %>% + mutate(dots_count = str_count(dots, ".")) %>% + collect(), + df + ) + + expect_dplyr_equal( + input %>% + mutate(dots_count = str_count(dots, fixed("."))) %>% + collect(), + df + ) +}) From 176c1132e578ecf0b81429246f95742f250e9305 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 22 Oct 2021 12:06:44 -0400 Subject: [PATCH 003/194] ARROW-14350: [IR] Add filter expression to Source node This PR adds a `filter` expression to `Source` nodes to support consumers that implement predicate pushdown. Closes #11438 from cpcloud/ARROW-14350 Authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Signed-off-by: Benjamin Kietzman --- cpp/src/generated/Relation_generated.h | 21 ++++++++++++++++++++- experimental/computeir/Relation.fbs | 7 +++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/cpp/src/generated/Relation_generated.h b/cpp/src/generated/Relation_generated.h index 6c9d9bc927a95..2c58784e0c45b 100644 --- a/cpp/src/generated/Relation_generated.h +++ b/cpp/src/generated/Relation_generated.h @@ -1327,7 +1327,8 @@ struct Source FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { VT_BASE = 4, VT_NAME = 6, - VT_SCHEMA = 8 + VT_FILTER = 8, + VT_SCHEMA = 10 }; const org::apache::arrow::computeir::flatbuf::RelBase *base() const { return GetPointer(VT_BASE); @@ -1335,6 +1336,15 @@ struct Source FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const flatbuffers::String *name() const { return GetPointer(VT_NAME); } + /// An optional expression used to filter out rows directly from the source. + /// + /// Useful for consumers that implement predicate pushdown. + /// + /// A missing filter value indicates no filter, i.e., all rows are + /// returned from the source. + const org::apache::arrow::computeir::flatbuf::Expression *filter() const { + return GetPointer(VT_FILTER); + } const org::apache::arrow::flatbuf::Schema *schema() const { return GetPointer(VT_SCHEMA); } @@ -1344,6 +1354,8 @@ struct Source FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { verifier.VerifyTable(base()) && VerifyOffsetRequired(verifier, VT_NAME) && verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_FILTER) && + verifier.VerifyTable(filter()) && VerifyOffsetRequired(verifier, VT_SCHEMA) && verifier.VerifyTable(schema()) && verifier.EndTable(); @@ -1360,6 +1372,9 @@ struct SourceBuilder { void add_name(flatbuffers::Offset name) { fbb_.AddOffset(Source::VT_NAME, name); } + void add_filter(flatbuffers::Offset filter) { + fbb_.AddOffset(Source::VT_FILTER, filter); + } void add_schema(flatbuffers::Offset schema) { fbb_.AddOffset(Source::VT_SCHEMA, schema); } @@ -1382,9 +1397,11 @@ inline flatbuffers::Offset CreateSource( flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset base = 0, flatbuffers::Offset name = 0, + flatbuffers::Offset filter = 0, flatbuffers::Offset schema = 0) { SourceBuilder builder_(_fbb); builder_.add_schema(schema); + builder_.add_filter(filter); builder_.add_name(name); builder_.add_base(base); return builder_.Finish(); @@ -1394,12 +1411,14 @@ inline flatbuffers::Offset CreateSourceDirect( flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset base = 0, const char *name = nullptr, + flatbuffers::Offset filter = 0, flatbuffers::Offset schema = 0) { auto name__ = name ? _fbb.CreateString(name) : 0; return org::apache::arrow::computeir::flatbuf::CreateSource( _fbb, base, name__, + filter, schema); } diff --git a/experimental/computeir/Relation.fbs b/experimental/computeir/Relation.fbs index ab0156e0f1b05..3af159a033952 100644 --- a/experimental/computeir/Relation.fbs +++ b/experimental/computeir/Relation.fbs @@ -197,6 +197,13 @@ table LiteralRelation { table Source { base: RelBase (required); name: string (required); + /// An optional expression used to filter out rows directly from the source. + /// + /// Useful for consumers that implement predicate pushdown. + /// + /// A missing filter value indicates no filter, i.e., all rows are + /// returned from the source. + filter: Expression; schema: org.apache.arrow.flatbuf.Schema (required); } From ee18c08c6e65ac4968e7615cbae3558d85cf5e02 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 22 Oct 2021 12:11:33 -0400 Subject: [PATCH 004/194] ARROW-14349: [IR] Remove RelBase This PR removes `RelBase` and associated column (re)mapping types. Producers/consumers are expected to use projections for reordering for the time being. Closes #11435 from cpcloud/ARROW-14349 Authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Signed-off-by: Benjamin Kietzman --- cpp/src/generated/Relation_generated.h | 483 ++++++------------------- experimental/computeir/Relation.fbs | 74 ++-- 2 files changed, 138 insertions(+), 419 deletions(-) diff --git a/cpp/src/generated/Relation_generated.h b/cpp/src/generated/Relation_generated.h index 2c58784e0c45b..0dbbc86ed5cc7 100644 --- a/cpp/src/generated/Relation_generated.h +++ b/cpp/src/generated/Relation_generated.h @@ -16,18 +16,9 @@ namespace arrow { namespace computeir { namespace flatbuf { -struct Remap; -struct RemapBuilder; - -struct PassThrough; -struct PassThroughBuilder; - struct RelId; struct RelIdBuilder; -struct RelBase; -struct RelBaseBuilder; - struct Filter; struct FilterBuilder; @@ -64,55 +55,6 @@ struct SourceBuilder; struct Relation; struct RelationBuilder; -/// A union for the different colum remapping variants -enum class Emit : uint8_t { - NONE = 0, - Remap = 1, - PassThrough = 2, - MIN = NONE, - MAX = PassThrough -}; - -inline const Emit (&EnumValuesEmit())[3] { - static const Emit values[] = { - Emit::NONE, - Emit::Remap, - Emit::PassThrough - }; - return values; -} - -inline const char * const *EnumNamesEmit() { - static const char * const names[4] = { - "NONE", - "Remap", - "PassThrough", - nullptr - }; - return names; -} - -inline const char *EnumNameEmit(Emit e) { - if (flatbuffers::IsOutRange(e, Emit::NONE, Emit::PassThrough)) return ""; - const size_t index = static_cast(e); - return EnumNamesEmit()[index]; -} - -template struct EmitTraits { - static const Emit enum_value = Emit::NONE; -}; - -template<> struct EmitTraits { - static const Emit enum_value = Emit::Remap; -}; - -template<> struct EmitTraits { - static const Emit enum_value = Emit::PassThrough; -}; - -bool VerifyEmit(flatbuffers::Verifier &verifier, const void *obj, Emit type); -bool VerifyEmitVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); - enum class JoinKind : uint8_t { Anti = 0, Cross = 1, @@ -290,97 +232,6 @@ template<> struct RelationImplTraits> *values, const flatbuffers::Vector *types); -/// A data type indicating that a different mapping of columns -/// should occur in the output. -/// -/// For example: -/// -/// Given a query `SELECT b, a FROM t` where `t` has columns a, b, c -/// the mapping value for the projection would equal [1, 0]. -struct Remap FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef RemapBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_MAPPING = 4 - }; - const flatbuffers::Vector> *mapping() const { - return GetPointer> *>(VT_MAPPING); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_MAPPING) && - verifier.VerifyVector(mapping()) && - verifier.VerifyVectorOfTables(mapping()) && - verifier.EndTable(); - } -}; - -struct RemapBuilder { - typedef Remap Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_mapping(flatbuffers::Offset>> mapping) { - fbb_.AddOffset(Remap::VT_MAPPING, mapping); - } - explicit RemapBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - RemapBuilder &operator=(const RemapBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, Remap::VT_MAPPING); - return o; - } -}; - -inline flatbuffers::Offset CreateRemap( - flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> mapping = 0) { - RemapBuilder builder_(_fbb); - builder_.add_mapping(mapping); - return builder_.Finish(); -} - -inline flatbuffers::Offset CreateRemapDirect( - flatbuffers::FlatBufferBuilder &_fbb, - const std::vector> *mapping = nullptr) { - auto mapping__ = mapping ? _fbb.CreateVector>(*mapping) : 0; - return org::apache::arrow::computeir::flatbuf::CreateRemap( - _fbb, - mapping__); -} - -struct PassThrough FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef PassThroughBuilder Builder; - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - verifier.EndTable(); - } -}; - -struct PassThroughBuilder { - typedef PassThrough Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - explicit PassThroughBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - PassThroughBuilder &operator=(const PassThroughBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - return o; - } -}; - -inline flatbuffers::Offset CreatePassThrough( - flatbuffers::FlatBufferBuilder &_fbb) { - PassThroughBuilder builder_(_fbb); - return builder_.Finish(); -} - /// An identifier for relations in a query. /// /// A table is used here to allow plan implementations optionality. @@ -426,101 +277,18 @@ inline flatbuffers::Offset CreateRelId( return builder_.Finish(); } -/// Fields common to every relational operator -struct RelBase FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { - typedef RelBaseBuilder Builder; - enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_OUTPUT_MAPPING_TYPE = 4, - VT_OUTPUT_MAPPING = 6, - VT_ID = 8 - }; - org::apache::arrow::computeir::flatbuf::Emit output_mapping_type() const { - return static_cast(GetField(VT_OUTPUT_MAPPING_TYPE, 0)); - } - /// Output remapping of ordinal columns for a given operation - const void *output_mapping() const { - return GetPointer(VT_OUTPUT_MAPPING); - } - template const T *output_mapping_as() const; - const org::apache::arrow::computeir::flatbuf::Remap *output_mapping_as_Remap() const { - return output_mapping_type() == org::apache::arrow::computeir::flatbuf::Emit::Remap ? static_cast(output_mapping()) : nullptr; - } - const org::apache::arrow::computeir::flatbuf::PassThrough *output_mapping_as_PassThrough() const { - return output_mapping_type() == org::apache::arrow::computeir::flatbuf::Emit::PassThrough ? static_cast(output_mapping()) : nullptr; - } - /// An identifiier for a relation. The identifier should be unique over the - /// entire plan. Optional. - const org::apache::arrow::computeir::flatbuf::RelId *id() const { - return GetPointer(VT_ID); - } - bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && - VerifyField(verifier, VT_OUTPUT_MAPPING_TYPE) && - VerifyOffsetRequired(verifier, VT_OUTPUT_MAPPING) && - VerifyEmit(verifier, output_mapping(), output_mapping_type()) && - VerifyOffset(verifier, VT_ID) && - verifier.VerifyTable(id()) && - verifier.EndTable(); - } -}; - -template<> inline const org::apache::arrow::computeir::flatbuf::Remap *RelBase::output_mapping_as() const { - return output_mapping_as_Remap(); -} - -template<> inline const org::apache::arrow::computeir::flatbuf::PassThrough *RelBase::output_mapping_as() const { - return output_mapping_as_PassThrough(); -} - -struct RelBaseBuilder { - typedef RelBase Table; - flatbuffers::FlatBufferBuilder &fbb_; - flatbuffers::uoffset_t start_; - void add_output_mapping_type(org::apache::arrow::computeir::flatbuf::Emit output_mapping_type) { - fbb_.AddElement(RelBase::VT_OUTPUT_MAPPING_TYPE, static_cast(output_mapping_type), 0); - } - void add_output_mapping(flatbuffers::Offset output_mapping) { - fbb_.AddOffset(RelBase::VT_OUTPUT_MAPPING, output_mapping); - } - void add_id(flatbuffers::Offset id) { - fbb_.AddOffset(RelBase::VT_ID, id); - } - explicit RelBaseBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { - start_ = fbb_.StartTable(); - } - RelBaseBuilder &operator=(const RelBaseBuilder &); - flatbuffers::Offset Finish() { - const auto end = fbb_.EndTable(start_); - auto o = flatbuffers::Offset(end); - fbb_.Required(o, RelBase::VT_OUTPUT_MAPPING); - return o; - } -}; - -inline flatbuffers::Offset CreateRelBase( - flatbuffers::FlatBufferBuilder &_fbb, - org::apache::arrow::computeir::flatbuf::Emit output_mapping_type = org::apache::arrow::computeir::flatbuf::Emit::NONE, - flatbuffers::Offset output_mapping = 0, - flatbuffers::Offset id = 0) { - RelBaseBuilder builder_(_fbb); - builder_.add_id(id); - builder_.add_output_mapping(output_mapping); - builder_.add_output_mapping_type(output_mapping_type); - return builder_.Finish(); -} - /// Filter operation struct Filter FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef FilterBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_REL = 6, VT_PREDICATE = 8 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// Child relation const org::apache::arrow::computeir::flatbuf::Relation *rel() const { @@ -534,8 +302,8 @@ struct Filter FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_REL) && verifier.VerifyTable(rel()) && VerifyOffsetRequired(verifier, VT_PREDICATE) && @@ -548,8 +316,8 @@ struct FilterBuilder { typedef Filter Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(Filter::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(Filter::VT_ID, id); } void add_rel(flatbuffers::Offset rel) { fbb_.AddOffset(Filter::VT_REL, rel); @@ -565,7 +333,6 @@ struct FilterBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, Filter::VT_BASE); fbb_.Required(o, Filter::VT_REL); fbb_.Required(o, Filter::VT_PREDICATE); return o; @@ -574,13 +341,13 @@ struct FilterBuilder { inline flatbuffers::Offset CreateFilter( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, flatbuffers::Offset predicate = 0) { FilterBuilder builder_(_fbb); builder_.add_predicate(predicate); builder_.add_rel(rel); - builder_.add_base(base); + builder_.add_id(id); return builder_.Finish(); } @@ -588,13 +355,14 @@ inline flatbuffers::Offset CreateFilter( struct Project FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef ProjectBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_REL = 6, VT_EXPRESSIONS = 8 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// Child relation const org::apache::arrow::computeir::flatbuf::Relation *rel() const { @@ -607,8 +375,8 @@ struct Project FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_REL) && verifier.VerifyTable(rel()) && VerifyOffsetRequired(verifier, VT_EXPRESSIONS) && @@ -622,8 +390,8 @@ struct ProjectBuilder { typedef Project Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(Project::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(Project::VT_ID, id); } void add_rel(flatbuffers::Offset rel) { fbb_.AddOffset(Project::VT_REL, rel); @@ -639,7 +407,6 @@ struct ProjectBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, Project::VT_BASE); fbb_.Required(o, Project::VT_REL); fbb_.Required(o, Project::VT_EXPRESSIONS); return o; @@ -648,25 +415,25 @@ struct ProjectBuilder { inline flatbuffers::Offset CreateProject( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, flatbuffers::Offset>> expressions = 0) { ProjectBuilder builder_(_fbb); builder_.add_expressions(expressions); builder_.add_rel(rel); - builder_.add_base(base); + builder_.add_id(id); return builder_.Finish(); } inline flatbuffers::Offset CreateProjectDirect( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, const std::vector> *expressions = nullptr) { auto expressions__ = expressions ? _fbb.CreateVector>(*expressions) : 0; return org::apache::arrow::computeir::flatbuf::CreateProject( _fbb, - base, + id, rel, expressions__); } @@ -731,14 +498,15 @@ inline flatbuffers::Offset CreateGroupingDirect( struct Aggregate FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef AggregateBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_REL = 6, VT_MEASURES = 8, VT_GROUPINGS = 10 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// Child relation const org::apache::arrow::computeir::flatbuf::Relation *rel() const { @@ -768,8 +536,8 @@ struct Aggregate FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_REL) && verifier.VerifyTable(rel()) && VerifyOffsetRequired(verifier, VT_MEASURES) && @@ -786,8 +554,8 @@ struct AggregateBuilder { typedef Aggregate Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(Aggregate::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(Aggregate::VT_ID, id); } void add_rel(flatbuffers::Offset rel) { fbb_.AddOffset(Aggregate::VT_REL, rel); @@ -806,7 +574,6 @@ struct AggregateBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, Aggregate::VT_BASE); fbb_.Required(o, Aggregate::VT_REL); fbb_.Required(o, Aggregate::VT_MEASURES); fbb_.Required(o, Aggregate::VT_GROUPINGS); @@ -816,7 +583,7 @@ struct AggregateBuilder { inline flatbuffers::Offset CreateAggregate( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, flatbuffers::Offset>> measures = 0, flatbuffers::Offset>> groupings = 0) { @@ -824,13 +591,13 @@ inline flatbuffers::Offset CreateAggregate( builder_.add_groupings(groupings); builder_.add_measures(measures); builder_.add_rel(rel); - builder_.add_base(base); + builder_.add_id(id); return builder_.Finish(); } inline flatbuffers::Offset CreateAggregateDirect( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, const std::vector> *measures = nullptr, const std::vector> *groupings = nullptr) { @@ -838,7 +605,7 @@ inline flatbuffers::Offset CreateAggregateDirect( auto groupings__ = groupings ? _fbb.CreateVector>(*groupings) : 0; return org::apache::arrow::computeir::flatbuf::CreateAggregate( _fbb, - base, + id, rel, measures__, groupings__); @@ -848,15 +615,16 @@ inline flatbuffers::Offset CreateAggregateDirect( struct Join FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef JoinBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_LEFT = 6, VT_RIGHT = 8, VT_ON_EXPRESSION = 10, VT_JOIN_KIND = 12 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// Left relation const org::apache::arrow::computeir::flatbuf::Relation *left() const { @@ -878,8 +646,8 @@ struct Join FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_LEFT) && verifier.VerifyTable(left()) && VerifyOffsetRequired(verifier, VT_RIGHT) && @@ -895,8 +663,8 @@ struct JoinBuilder { typedef Join Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(Join::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(Join::VT_ID, id); } void add_left(flatbuffers::Offset left) { fbb_.AddOffset(Join::VT_LEFT, left); @@ -918,7 +686,6 @@ struct JoinBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, Join::VT_BASE); fbb_.Required(o, Join::VT_LEFT); fbb_.Required(o, Join::VT_RIGHT); fbb_.Required(o, Join::VT_ON_EXPRESSION); @@ -928,7 +695,7 @@ struct JoinBuilder { inline flatbuffers::Offset CreateJoin( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset left = 0, flatbuffers::Offset right = 0, flatbuffers::Offset on_expression = 0, @@ -937,7 +704,7 @@ inline flatbuffers::Offset CreateJoin( builder_.add_on_expression(on_expression); builder_.add_right(right); builder_.add_left(left); - builder_.add_base(base); + builder_.add_id(id); builder_.add_join_kind(join_kind); return builder_.Finish(); } @@ -946,13 +713,14 @@ inline flatbuffers::Offset CreateJoin( struct OrderBy FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef OrderByBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_REL = 6, VT_KEYS = 8 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// Child relation const org::apache::arrow::computeir::flatbuf::Relation *rel() const { @@ -965,8 +733,8 @@ struct OrderBy FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_REL) && verifier.VerifyTable(rel()) && VerifyOffsetRequired(verifier, VT_KEYS) && @@ -980,8 +748,8 @@ struct OrderByBuilder { typedef OrderBy Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(OrderBy::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(OrderBy::VT_ID, id); } void add_rel(flatbuffers::Offset rel) { fbb_.AddOffset(OrderBy::VT_REL, rel); @@ -997,7 +765,6 @@ struct OrderByBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, OrderBy::VT_BASE); fbb_.Required(o, OrderBy::VT_REL); fbb_.Required(o, OrderBy::VT_KEYS); return o; @@ -1006,25 +773,25 @@ struct OrderByBuilder { inline flatbuffers::Offset CreateOrderBy( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, flatbuffers::Offset>> keys = 0) { OrderByBuilder builder_(_fbb); builder_.add_keys(keys); builder_.add_rel(rel); - builder_.add_base(base); + builder_.add_id(id); return builder_.Finish(); } inline flatbuffers::Offset CreateOrderByDirect( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, const std::vector> *keys = nullptr) { auto keys__ = keys ? _fbb.CreateVector>(*keys) : 0; return org::apache::arrow::computeir::flatbuf::CreateOrderBy( _fbb, - base, + id, rel, keys__); } @@ -1033,14 +800,15 @@ inline flatbuffers::Offset CreateOrderByDirect( struct Limit FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef LimitBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_REL = 6, VT_OFFSET = 8, VT_COUNT = 10 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// Child relation const org::apache::arrow::computeir::flatbuf::Relation *rel() const { @@ -1056,8 +824,8 @@ struct Limit FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_REL) && verifier.VerifyTable(rel()) && VerifyField(verifier, VT_OFFSET) && @@ -1070,8 +838,8 @@ struct LimitBuilder { typedef Limit Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(Limit::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(Limit::VT_ID, id); } void add_rel(flatbuffers::Offset rel) { fbb_.AddOffset(Limit::VT_REL, rel); @@ -1090,7 +858,6 @@ struct LimitBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, Limit::VT_BASE); fbb_.Required(o, Limit::VT_REL); return o; } @@ -1098,7 +865,7 @@ struct LimitBuilder { inline flatbuffers::Offset CreateLimit( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset rel = 0, uint32_t offset = 0, uint32_t count = 0) { @@ -1106,7 +873,7 @@ inline flatbuffers::Offset CreateLimit( builder_.add_count(count); builder_.add_offset(offset); builder_.add_rel(rel); - builder_.add_base(base); + builder_.add_id(id); return builder_.Finish(); } @@ -1114,13 +881,14 @@ inline flatbuffers::Offset CreateLimit( struct SetOperation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SetOperationBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_RELS = 6, VT_SET_OP = 8 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// Child relations const flatbuffers::Vector> *rels() const { @@ -1132,8 +900,8 @@ struct SetOperation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_RELS) && verifier.VerifyVector(rels()) && verifier.VerifyVectorOfTables(rels()) && @@ -1146,8 +914,8 @@ struct SetOperationBuilder { typedef SetOperation Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(SetOperation::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(SetOperation::VT_ID, id); } void add_rels(flatbuffers::Offset>> rels) { fbb_.AddOffset(SetOperation::VT_RELS, rels); @@ -1163,7 +931,6 @@ struct SetOperationBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, SetOperation::VT_BASE); fbb_.Required(o, SetOperation::VT_RELS); return o; } @@ -1171,25 +938,25 @@ struct SetOperationBuilder { inline flatbuffers::Offset CreateSetOperation( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset>> rels = 0, org::apache::arrow::computeir::flatbuf::SetOpKind set_op = org::apache::arrow::computeir::flatbuf::SetOpKind::Union) { SetOperationBuilder builder_(_fbb); builder_.add_rels(rels); - builder_.add_base(base); + builder_.add_id(id); builder_.add_set_op(set_op); return builder_.Finish(); } inline flatbuffers::Offset CreateSetOperationDirect( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, const std::vector> *rels = nullptr, org::apache::arrow::computeir::flatbuf::SetOpKind set_op = org::apache::arrow::computeir::flatbuf::SetOpKind::Union) { auto rels__ = rels ? _fbb.CreateVector>(*rels) : 0; return org::apache::arrow::computeir::flatbuf::CreateSetOperation( _fbb, - base, + id, rels__, set_op); } @@ -1254,12 +1021,13 @@ inline flatbuffers::Offset CreateLiteralColumnDirect( struct LiteralRelation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef LiteralRelationBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_COLUMNS = 6 }; - /// Common options - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } /// The columns of this literal relation. const flatbuffers::Vector> *columns() const { @@ -1267,8 +1035,8 @@ struct LiteralRelation FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_COLUMNS) && verifier.VerifyVector(columns()) && verifier.VerifyVectorOfTables(columns()) && @@ -1280,8 +1048,8 @@ struct LiteralRelationBuilder { typedef LiteralRelation Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(LiteralRelation::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(LiteralRelation::VT_ID, id); } void add_columns(flatbuffers::Offset>> columns) { fbb_.AddOffset(LiteralRelation::VT_COLUMNS, columns); @@ -1294,7 +1062,6 @@ struct LiteralRelationBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, LiteralRelation::VT_BASE); fbb_.Required(o, LiteralRelation::VT_COLUMNS); return o; } @@ -1302,22 +1069,22 @@ struct LiteralRelationBuilder { inline flatbuffers::Offset CreateLiteralRelation( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset>> columns = 0) { LiteralRelationBuilder builder_(_fbb); builder_.add_columns(columns); - builder_.add_base(base); + builder_.add_id(id); return builder_.Finish(); } inline flatbuffers::Offset CreateLiteralRelationDirect( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, const std::vector> *columns = nullptr) { auto columns__ = columns ? _fbb.CreateVector>(*columns) : 0; return org::apache::arrow::computeir::flatbuf::CreateLiteralRelation( _fbb, - base, + id, columns__); } @@ -1325,13 +1092,15 @@ inline flatbuffers::Offset CreateLiteralRelationDirect( struct Source FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SourceBuilder Builder; enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { - VT_BASE = 4, + VT_ID = 4, VT_NAME = 6, VT_FILTER = 8, VT_SCHEMA = 10 }; - const org::apache::arrow::computeir::flatbuf::RelBase *base() const { - return GetPointer(VT_BASE); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + const org::apache::arrow::computeir::flatbuf::RelId *id() const { + return GetPointer(VT_ID); } const flatbuffers::String *name() const { return GetPointer(VT_NAME); @@ -1350,8 +1119,8 @@ struct Source FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyOffsetRequired(verifier, VT_BASE) && - verifier.VerifyTable(base()) && + VerifyOffset(verifier, VT_ID) && + verifier.VerifyTable(id()) && VerifyOffsetRequired(verifier, VT_NAME) && verifier.VerifyString(name()) && VerifyOffset(verifier, VT_FILTER) && @@ -1366,8 +1135,8 @@ struct SourceBuilder { typedef Source Table; flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_base(flatbuffers::Offset base) { - fbb_.AddOffset(Source::VT_BASE, base); + void add_id(flatbuffers::Offset id) { + fbb_.AddOffset(Source::VT_ID, id); } void add_name(flatbuffers::Offset name) { fbb_.AddOffset(Source::VT_NAME, name); @@ -1386,7 +1155,6 @@ struct SourceBuilder { flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); - fbb_.Required(o, Source::VT_BASE); fbb_.Required(o, Source::VT_NAME); fbb_.Required(o, Source::VT_SCHEMA); return o; @@ -1395,7 +1163,7 @@ struct SourceBuilder { inline flatbuffers::Offset CreateSource( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, flatbuffers::Offset name = 0, flatbuffers::Offset filter = 0, flatbuffers::Offset schema = 0) { @@ -1403,20 +1171,20 @@ inline flatbuffers::Offset CreateSource( builder_.add_schema(schema); builder_.add_filter(filter); builder_.add_name(name); - builder_.add_base(base); + builder_.add_id(id); return builder_.Finish(); } inline flatbuffers::Offset CreateSourceDirect( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset base = 0, + flatbuffers::Offset id = 0, const char *name = nullptr, flatbuffers::Offset filter = 0, flatbuffers::Offset schema = 0) { auto name__ = name ? _fbb.CreateString(name) : 0; return org::apache::arrow::computeir::flatbuf::CreateSource( _fbb, - base, + id, name__, filter, schema); @@ -1541,35 +1309,6 @@ inline flatbuffers::Offset CreateRelation( return builder_.Finish(); } -inline bool VerifyEmit(flatbuffers::Verifier &verifier, const void *obj, Emit type) { - switch (type) { - case Emit::NONE: { - return true; - } - case Emit::Remap: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - case Emit::PassThrough: { - auto ptr = reinterpret_cast(obj); - return verifier.VerifyTable(ptr); - } - default: return true; - } -} - -inline bool VerifyEmitVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { - if (!values || !types) return !values && !types; - if (values->size() != types->size()) return false; - for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyEmit( - verifier, values->Get(i), types->GetEnum(i))) { - return false; - } - } - return true; -} - inline bool VerifyRelationImpl(flatbuffers::Verifier &verifier, const void *obj, RelationImpl type) { switch (type) { case RelationImpl::NONE: { diff --git a/experimental/computeir/Relation.fbs b/experimental/computeir/Relation.fbs index 3af159a033952..12092ec9296bf 100644 --- a/experimental/computeir/Relation.fbs +++ b/experimental/computeir/Relation.fbs @@ -21,26 +21,6 @@ include "Expression.fbs"; namespace org.apache.arrow.computeir.flatbuf; -/// A data type indicating that a different mapping of columns -/// should occur in the output. -/// -/// For example: -/// -/// Given a query `SELECT b, a FROM t` where `t` has columns a, b, c -/// the mapping value for the projection would equal [1, 0]. -table Remap { - mapping: [FieldIndex] (required); -} - -// Pass through indicates that no output remapping should occur. -table PassThrough {} - -/// A union for the different colum remapping variants -union Emit { - Remap, - PassThrough, -} - /// An identifier for relations in a query. /// /// A table is used here to allow plan implementations optionality. @@ -48,20 +28,11 @@ table RelId { id: uint64; } -/// Fields common to every relational operator -table RelBase { - /// Output remapping of ordinal columns for a given operation - output_mapping: Emit (required); - - /// An identifiier for a relation. The identifier should be unique over the - /// entire plan. Optional. - id: RelId; -} - /// Filter operation table Filter { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// Child relation rel: Relation (required); /// The expression which will be evaluated against input rows @@ -72,8 +43,9 @@ table Filter { /// Projection table Project { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// Child relation rel: Relation (required); /// Expressions which will be evaluated to produce to @@ -89,8 +61,9 @@ table Grouping { /// Aggregate operation table Aggregate { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// Child relation rel: Relation (required); /// Expressions which will be evaluated to produce to @@ -125,8 +98,9 @@ enum JoinKind : uint8 { /// Join between two tables table Join { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// Left relation left: Relation (required); /// Right relation @@ -141,8 +115,9 @@ table Join { /// Order by relation table OrderBy { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// Child relation rel: Relation (required); /// Define sort order for rows of output. @@ -152,8 +127,9 @@ table OrderBy { /// Limit operation table Limit { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// Child relation rel: Relation (required); /// Starting index of rows @@ -171,8 +147,9 @@ enum SetOpKind : uint8 { /// A set operation on two or more relations table SetOperation { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// Child relations rels: [Relation] (required); /// The kind of set operation @@ -187,15 +164,18 @@ table LiteralColumn { /// Literal relation table LiteralRelation { - /// Common options - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; /// The columns of this literal relation. columns: [LiteralColumn] (required); } /// An external source of tabular data table Source { - base: RelBase (required); + /// An identifiier for the relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; name: string (required); /// An optional expression used to filter out rows directly from the source. /// From 03669438bbce53078616c7f943a63fb0c11db196 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Fri, 22 Oct 2021 11:12:33 -0500 Subject: [PATCH 005/194] MINOR: [Docs][C#] Update C# documentation Updating the C# README and feature status matrix to match the current implementation. Also resolving the question in https://github.com/apache/arrow/issues/11367. Closes #11378 from eerhardt/UpdateDocs Authored-by: Eric Erhardt Signed-off-by: Eric Erhardt --- csharp/README.md | 19 +++++++------------ docs/source/status.rst | 10 +++++----- 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/csharp/README.md b/csharp/README.md index 2a60cd27c5c84..3d0681279a324 100644 --- a/csharp/README.md +++ b/csharp/README.md @@ -21,12 +21,13 @@ An implementation of Arrow targeting .NET Standard. -This implementation is under development and may not be suitable for use in production environments. +See our current [feature matrix](https://github.com/apache/arrow/blob/master/docs/source/status.rst) +for currently available features. # Implementation - Arrow 0.11 (specification) -- C# 7.2 +- C# 8 - .NET Standard 1.3 - Asynchronous I/O - Uses modern .NET runtime features such as **Span<T>**, **Memory<T>**, **MemoryManager<T>**, and **System.Buffers** primitives for memory allocation, memory storage, and fast serialization. @@ -34,8 +35,8 @@ This implementation is under development and may not be suitable for use in prod # Known Issues -- Can not read Arrow files containing dictionary batches, tensors, or tables. -- Can not easily modify allocation strategy without implementing a custom memory pool. All allocations are currently 64-byte aligned and padded to 8-bytes. +- Cannot read Arrow files containing tensors. +- Cannot easily modify allocation strategy without implementing a custom memory pool. All allocations are currently 64-byte aligned and padded to 8-bytes. - Default memory allocation strategy uses an over-allocation strategy with pointer fixing, which results in significant memory overhead for small buffers. A buffer that requires a single byte for storage may be backed by an allocation of up to 64-bytes to satisfy alignment requirements. - There are currently few builder APIs available for specific array types. Arrays must be built manually with an arrow buffer builder abstraction. - FlatBuffer code generation is not included in the build process. @@ -44,8 +45,6 @@ This implementation is under development and may not be suitable for use in prod - Throws exceptions that are non-specific to the Arrow implementation in some circumstances where it probably should (eg. does not throw ArrowException exceptions) - Lack of code documentation - Lack of usage examples -- Lack of comprehensive unit tests -- Lack of comprehensive benchmarks # Usage @@ -57,7 +56,7 @@ This implementation is under development and may not be suitable for use in prod public static async Task ReadArrowAsync(string filename) { - using (var stream = File.OpenRead("test.arrow")) + using (var stream = File.OpenRead(filename)) using (var reader = new ArrowFileReader(stream)) { var recordBatch = await reader.ReadNextRecordBatchAsync(); @@ -113,10 +112,8 @@ This implementation is under development and may not be suitable for use in prod - Serialization - Exhaustive validation - Dictionary Batch - - Can not serialize or deserialize files or streams containing dictionary batches + - Cannot serialize files or streams containing dictionary batches - Dictionary Encoding - - Schema Metadata - - Schema Field Metadata - Types - Tensor - Table @@ -125,11 +122,9 @@ This implementation is under development and may not be suitable for use in prod - Dense - Sparse - Half-Float - - Dictionary - Array Operations - Equality / Comparison - Casting - - Builders - Compute - There is currently no API available for a compute / kernel abstraction. diff --git a/docs/source/status.rst b/docs/source/status.rst index 8e3e998dfb9c2..879f20f81faa8 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -56,7 +56,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Interval | ✓ | ✓ | ✓ | | | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ -| Fixed Size Binary | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | +| Fixed Size Binary | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Binary | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ @@ -90,7 +90,7 @@ Data Types | Data type | C++ | Java | Go | JavaScript | C# | Rust | Julia | | (special) | | | | | | | | +===================+=======+=======+=======+============+=======+=======+=======+ -| Dictionary | ✓ | ✓ (1) | | ✓ (1) | | ✓ (1) | ✓ | +| Dictionary | ✓ | ✓ (1) | | ✓ (1) | ✓ (1) | ✓ (1) | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ | Extension | ✓ | ✓ | ✓ | | | | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+ @@ -116,11 +116,11 @@ IPC Format +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ | Record batches | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ -| Dictionaries | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓ | +| Dictionaries | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ | Replacement dictionaries | ✓ | ✓ | | | | | ✓ | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ -| Delta dictionaries | ✓ (1) | | | | | | ✓ | +| Delta dictionaries | ✓ (1) | | | | ✓ | | ✓ | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ | Tensors | ✓ | | | | | | | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ @@ -130,7 +130,7 @@ IPC Format +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ | Endianness conversion | ✓ (2) | | | | | | | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ -| Custom schema metadata | ✓ | ✓ | ✓ | | | ✓ | ✓ | +| Custom schema metadata | ✓ | ✓ | ✓ | | ✓ | ✓ | ✓ | +-----------------------------+-------+-------+-------+------------+-------+-------+-------+ Notes: From d04a46bb8c71a33c043b64ad596f5af43f42580c Mon Sep 17 00:00:00 2001 From: Jayjeet Chakraborty Date: Fri, 22 Oct 2021 08:17:08 -1000 Subject: [PATCH 006/194] ARROW-13607: [C++] Add Skyhook to Arrow Closes #10913 from JayjeetAtGithub/skyhook/pr_1 Authored-by: Jayjeet Chakraborty Signed-off-by: Weston Pace --- ci/docker/ubuntu-20.04-cpp.dockerfile | 5 + ci/scripts/cpp_build.sh | 1 + ci/scripts/generate_dataset.py | 47 +++ ci/scripts/install_ceph.sh | 28 ++ ci/scripts/integration_skyhook.sh | 141 +++++++++ cpp/CMakeLists.txt | 11 + cpp/cmake_modules/DefineOptions.cmake | 2 + cpp/cmake_modules/Findlibrados.cmake | 34 +++ cpp/src/arrow/dataset/dataset.h | 3 + cpp/src/arrow/dataset/scanner_internal.h | 5 + cpp/src/skyhook/CMakeLists.txt | 87 ++++++ cpp/src/skyhook/client/CMakeLists.txt | 18 ++ cpp/src/skyhook/client/file_skyhook.cc | 182 ++++++++++++ cpp/src/skyhook/client/file_skyhook.h | 108 +++++++ cpp/src/skyhook/cls/cls_skyhook.cc | 267 ++++++++++++++++++ cpp/src/skyhook/cls/cls_skyhook_test.cc | 207 ++++++++++++++ cpp/src/skyhook/protocol/ScanRequest.fbs | 29 ++ .../skyhook/protocol/ScanRequest_generated.h | 167 +++++++++++ cpp/src/skyhook/protocol/rados_protocol.cc | 99 +++++++ cpp/src/skyhook/protocol/rados_protocol.h | 103 +++++++ cpp/src/skyhook/protocol/skyhook_protocol.cc | 136 +++++++++ cpp/src/skyhook/protocol/skyhook_protocol.h | 116 ++++++++ .../skyhook/protocol/skyhook_protocol_test.cc | 71 +++++ cpp/src/skyhook/skyhook.pc.in | 26 ++ dev/tasks/tasks.yml | 9 + docker-compose.yml | 3 + 26 files changed, 1905 insertions(+) create mode 100644 ci/scripts/generate_dataset.py create mode 100755 ci/scripts/install_ceph.sh create mode 100755 ci/scripts/integration_skyhook.sh create mode 100644 cpp/cmake_modules/Findlibrados.cmake create mode 100644 cpp/src/skyhook/CMakeLists.txt create mode 100644 cpp/src/skyhook/client/CMakeLists.txt create mode 100644 cpp/src/skyhook/client/file_skyhook.cc create mode 100644 cpp/src/skyhook/client/file_skyhook.h create mode 100644 cpp/src/skyhook/cls/cls_skyhook.cc create mode 100644 cpp/src/skyhook/cls/cls_skyhook_test.cc create mode 100644 cpp/src/skyhook/protocol/ScanRequest.fbs create mode 100644 cpp/src/skyhook/protocol/ScanRequest_generated.h create mode 100644 cpp/src/skyhook/protocol/rados_protocol.cc create mode 100644 cpp/src/skyhook/protocol/rados_protocol.h create mode 100644 cpp/src/skyhook/protocol/skyhook_protocol.cc create mode 100644 cpp/src/skyhook/protocol/skyhook_protocol.h create mode 100644 cpp/src/skyhook/protocol/skyhook_protocol_test.cc create mode 100644 cpp/src/skyhook/skyhook.pc.in diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index de872da9a8f75..5a48c648e3bfe 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -78,6 +78,7 @@ RUN apt-get update -y -q && \ liblz4-dev \ libprotobuf-dev \ libprotoc-dev \ + libradospp-dev \ libre2-dev \ libsnappy-dev \ libssl-dev \ @@ -89,6 +90,8 @@ RUN apt-get update -y -q && \ pkg-config \ protobuf-compiler \ python3-pip \ + python3-rados \ + rados-objclass-dev \ rapidjson-dev \ tzdata \ wget && \ @@ -99,6 +102,8 @@ COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default +COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_ceph.sh # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index a11dd23b7f7fe..0ea9b1b89dc47 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -91,6 +91,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \ -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ diff --git a/ci/scripts/generate_dataset.py b/ci/scripts/generate_dataset.py new file mode 100644 index 0000000000000..42ee0763a1b25 --- /dev/null +++ b/ci/scripts/generate_dataset.py @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import os +import shutil +import random + +import pandas as pd + +if __name__ == "__main__": + # generate the test dataframe + data = { + "total_amount": list(), + "fare_amount": list() + } + for i in range(0, 500): + data['total_amount'].append(random.randint(1,11)*5) + data['fare_amount'].append(random.randint(1,11)*3) + df = pd.DataFrame(data) + + # dump the dataframe to a parquet file + df.to_parquet("skyhook_test_data.parquet") + + # create the dataset by copying the parquet files + shutil.rmtree("nyc", ignore_errors=True) + payment_type = ["1", "2", "3", "4"] + vendor_id = ["1", "2"] + for p in payment_type: + for v in vendor_id: + path = f"nyc/payment_type={p}/VendorID={v}" + os.makedirs(path, exist_ok=True) + shutil.copyfile("skyhook_test_data.parquet", os.path.join(path, f"{p}.{v}.parquet")) diff --git a/ci/scripts/install_ceph.sh b/ci/scripts/install_ceph.sh new file mode 100755 index 0000000000000..d9abef0619408 --- /dev/null +++ b/ci/scripts/install_ceph.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +ARCH=$(uname -m) +if [ "$ARCH" != "x86_64" ]; then + exit 0 +fi + +apt update +apt install -y attr ceph-common ceph-fuse ceph-mds ceph-mgr ceph-mon ceph-osd diff --git a/ci/scripts/integration_skyhook.sh b/ci/scripts/integration_skyhook.sh new file mode 100755 index 0000000000000..6c3011f9c63ed --- /dev/null +++ b/ci/scripts/integration_skyhook.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script spawns a single-node Ceph cluster, creates a CephFS mount, +# generates a Parquet dataset, and runs the SkyhookDM integration tests. +# Taken from https://github.com/ceph/go-ceph/blob/master/micro-osd.sh + +set -e +set -x +set -u + +if [ "${ARROW_SKYHOOK:-OFF}" != "ON" ]; then + exit 0 +fi + +ARROW_BUILD_DIR=${1}/cpp +DIR=/tmp/integration_skyhook + +# set environment variables +pkill ceph || true +rm -rf ${DIR}/* +LOG_DIR=${DIR}/log +MON_DATA=${DIR}/mon +MDS_DATA=${DIR}/mds +MOUNTPT=${MDS_DATA}/mnt +OSD_DATA=${DIR}/osd +mkdir -p ${LOG_DIR} ${MON_DATA} ${OSD_DATA} ${MDS_DATA} ${MOUNTPT} +MDS_NAME="Z" +MON_NAME="a" +MGR_NAME="x" +MIRROR_ID="m" + +# cluster wide parameters +cat >> ${DIR}/ceph.conf < ${MDS_DATA}/keyring +ceph osd pool create cephfs_data 8 +ceph osd pool create cephfs_metadata 8 +ceph fs new cephfs cephfs_metadata cephfs_data +ceph fs ls +ceph-mds -i ${MDS_NAME} +ceph status +while [[ ! $(ceph mds stat | grep "up:active") ]]; do sleep 1; done + +# start a manager +ceph-mgr --id ${MGR_NAME} + +# test the setup +ceph --version +ceph status + +apt update +apt install -y python3-pip + +pushd ${ARROW_BUILD_DIR} + # create the rados-classes, if not there already + mkdir -p /usr/lib/x86_64-linux-gnu/rados-classes/ + cp debug/libcls_skyhook* /usr/lib/x86_64-linux-gnu/rados-classes/ + + # mount a ceph filesystem to /mnt/cephfs in the user-space using ceph-fuse + mkdir -p /mnt/cephfs + ceph-fuse /mnt/cephfs + sleep 5 + + # download an example dataset and copy into the mounted dir + pip3 install pyarrow pandas + python3 /arrow/ci/scripts/generate_dataset.py + cp -r nyc /mnt/cephfs/ + sleep 10 + + # run the tests + SKYHOOK_CLS_TEST=debug/skyhook-cls-test + if [ -f "$SKYHOOK_CLS_TEST" ]; then + debug/skyhook-cls-test + fi + + SKYHOOK_PROTOCOL_TEST=debug/skyhook-protocol-test + if [ -f "$SKYHOOK_PROTOCOL_TEST" ]; then + debug/skyhook-protocol-test + fi +popd diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c787794d39de6..3c05f235df3dd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -351,6 +351,13 @@ if(ARROW_ENGINE) set(ARROW_COMPUTE ON) endif() +if(ARROW_SKYHOOK) + set(ARROW_DATASET ON) + set(ARROW_PARQUET ON) + set(ARROW_WITH_LZ4 ON) + set(ARROW_WITH_SNAPPY ON) +endif() + if(ARROW_DATASET) set(ARROW_COMPUTE ON) set(ARROW_FILESYSTEM ON) @@ -938,6 +945,10 @@ if(ARROW_GANDIVA) add_subdirectory(src/gandiva) endif() +if(ARROW_SKYHOOK) + add_subdirectory(src/skyhook) +endif() + if(ARROW_BUILD_EXAMPLES) add_custom_target(runexample ctest -L example) add_subdirectory(examples/arrow) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 3568887fa261f..f81a1b1577901 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -266,6 +266,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF) + define_option(ARROW_SKYHOOK "Build the Skyhook libraries" OFF) + define_option(ARROW_TENSORFLOW "Build Arrow with TensorFlow support enabled" OFF) define_option(ARROW_TESTING "Build the Arrow testing libraries" OFF) diff --git a/cpp/cmake_modules/Findlibrados.cmake b/cpp/cmake_modules/Findlibrados.cmake new file mode 100644 index 0000000000000..695d73fae1cb8 --- /dev/null +++ b/cpp/cmake_modules/Findlibrados.cmake @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +find_path(LIBRADOS_INCLUDE_DIR rados/librados.hpp) + +find_library(LIBRADOS_LIBRARY NAMES rados) + +mark_as_advanced(LIBRADOS_LIBRARY LIBRADOS_INCLUDE_DIR) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(librados DEFAULT_MSG LIBRADOS_LIBRARY + LIBRADOS_INCLUDE_DIR) + +if(librados_FOUND) + add_library(librados::rados UNKNOWN IMPORTED) + set_target_properties(librados::rados + PROPERTIES IMPORTED_LOCATION "${LIBRADOS_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES + "${LIBRADOS_INCLUDE_DIR}") +endif() diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h index 11210fdc27b81..a02954a23c83d 100644 --- a/cpp/src/arrow/dataset/dataset.h +++ b/cpp/src/arrow/dataset/dataset.h @@ -90,6 +90,9 @@ class ARROW_DS_EXPORT Fragment : public std::enable_shared_from_this { virtual ~Fragment() = default; + /// \brief Decide whether to apply filters and projections to this Fragment. + bool apply_compute = true; + protected: Fragment() = default; explicit Fragment(compute::Expression partition_expression, diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h index 7a43feb61179c..2c78d1b277444 100644 --- a/cpp/src/arrow/dataset/scanner_internal.h +++ b/cpp/src/arrow/dataset/scanner_internal.h @@ -185,6 +185,11 @@ inline Result GetScanTaskIterator( auto fn = [options](std::shared_ptr fragment) -> Result { ARROW_ASSIGN_OR_RAISE(auto scan_task_it, fragment->Scan(options)); + // Skip applying compute on fragments if disabled. + if (!fragment->apply_compute) { + return std::move(scan_task_it); + } + auto partition = fragment->partition_expression(); // Apply the filter and/or projection to incoming RecordBatches by // wrapping the ScanTask with a FilterAndProjectScanTask diff --git a/cpp/src/skyhook/CMakeLists.txt b/cpp/src/skyhook/CMakeLists.txt new file mode 100644 index 0000000000000..22a414c5f26ac --- /dev/null +++ b/cpp/src/skyhook/CMakeLists.txt @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitationsn +# under the License. + +# add the client subdirectory +add_subdirectory(client) + +# define the targets to build +add_custom_target(arrow_skyhook_client) +add_custom_target(cls_skyhook) + +# define the dependencies +find_package(librados REQUIRED) +set(ARROW_SKYHOOK_LINK_STATIC arrow_dataset_static librados::rados) +set(ARROW_SKYHOOK_LINK_SHARED arrow_dataset_shared librados::rados) + +# define the client and cls sources +set(ARROW_SKYHOOK_CLIENT_SOURCES client/file_skyhook.cc protocol/rados_protocol.cc + protocol/skyhook_protocol.cc) +set(ARROW_SKYHOOK_CLS_SOURCES cls/cls_skyhook.cc protocol/rados_protocol.cc + protocol/skyhook_protocol.cc) + +# define the client library +add_arrow_lib(arrow_skyhook_client + PKG_CONFIG_NAME + skyhook + SOURCES + ${ARROW_SKYHOOK_CLIENT_SOURCES} + OUTPUTS + ARROW_SKYHOOK_CLIENT_LIBRARIES + SHARED_LINK_LIBS + ${ARROW_SKYHOOK_LINK_SHARED} + STATIC_LINK_LIBS + ${ARROW_SKYHOOK_LINK_STATIC}) + +# define the cls library +add_arrow_lib(cls_skyhook + SOURCES + ${ARROW_SKYHOOK_CLS_SOURCES} + OUTPUTS + ARROW_SKYHOOK_CLS_LIBRARIES + SHARED_LINK_LIBS + ${ARROW_SKYHOOK_LINK_SHARED} + STATIC_LINK_LIBS + ${ARROW_SKYHOOK_LINK_STATIC}) + +# finish building the project +add_dependencies(arrow_skyhook_client ${ARROW_SKYHOOK_CLIENT_LIBRARIES}) +add_dependencies(cls_skyhook ${ARROW_SKYHOOK_CLS_LIBRARIES}) + +# define the test builds +if(ARROW_TEST_LINKAGE STREQUAL "static") + set(ARROW_SKYHOOK_TEST_LINK_LIBS arrow_dataset_static ${ARROW_TEST_STATIC_LINK_LIBS}) +else() + set(ARROW_SKYHOOK_TEST_LINK_LIBS arrow_dataset_shared ${ARROW_TEST_SHARED_LINK_LIBS}) +endif() +list(APPEND ARROW_SKYHOOK_TEST_LINK_LIBS ${ARROW_SKYHOOK_CLIENT_LIBRARIES}) + +# build the cls and protocol tests +add_arrow_test(cls_test + SOURCES + cls/cls_skyhook_test.cc + EXTRA_LINK_LIBS + ${ARROW_SKYHOOK_TEST_LINK_LIBS} + PREFIX + "skyhook") + +add_arrow_test(protocol_test + SOURCES + protocol/skyhook_protocol_test.cc + EXTRA_LINK_LIBS + ${ARROW_SKYHOOK_TEST_LINK_LIBS} + PREFIX + "skyhook") diff --git a/cpp/src/skyhook/client/CMakeLists.txt b/cpp/src/skyhook/client/CMakeLists.txt new file mode 100644 index 0000000000000..6255d9ad39c5c --- /dev/null +++ b/cpp/src/skyhook/client/CMakeLists.txt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arrow_install_all_headers("skyhook/client") diff --git a/cpp/src/skyhook/client/file_skyhook.cc b/cpp/src/skyhook/client/file_skyhook.cc new file mode 100644 index 0000000000000..f8b57f441d200 --- /dev/null +++ b/cpp/src/skyhook/client/file_skyhook.cc @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "skyhook/client/file_skyhook.h" +#include "skyhook/protocol/rados_protocol.h" +#include "skyhook/protocol/skyhook_protocol.h" + +#include "arrow/compute/exec/expression.h" +#include "arrow/dataset/file_base.h" +#include "arrow/dataset/file_ipc.h" +#include "arrow/dataset/file_parquet.h" +#include "arrow/util/compression.h" + +namespace skyhook { + +/// A ScanTask to scan a file fragment in Skyhook format. +class SkyhookScanTask : public arrow::dataset::ScanTask { + public: + SkyhookScanTask(std::shared_ptr options, + std::shared_ptr fragment, + arrow::dataset::FileSource source, + std::shared_ptr doa, + skyhook::SkyhookFileType::type file_format, + arrow::compute::Expression partition_expression) + : ScanTask(std::move(options), std::move(fragment)), + source_(std::move(source)), + doa_(std::move(doa)), + file_format_(file_format), + partition_expression_(std::move(partition_expression)) {} + + arrow::Result Execute() override { + /// Retrieve the size of the file using POSIX `stat`. + struct stat st {}; + RETURN_NOT_OK(doa_->Stat(source_.path(), st)); + + /// Create a ScanRequest instance. + skyhook::ScanRequest req; + req.filter_expression = options_->filter; + req.partition_expression = partition_expression_; + req.projection_schema = options_->projected_schema; + req.dataset_schema = options_->dataset_schema; + req.file_size = st.st_size; + req.file_format = file_format_; + + /// Serialize the ScanRequest into a ceph bufferlist. + ceph::bufferlist request; + RETURN_NOT_OK(skyhook::SerializeScanRequest(req, &request)); + + /// Execute the Ceph object class method `scan_op`. + ceph::bufferlist result; + RETURN_NOT_OK(doa_->Exec(st.st_ino, "scan_op", request, result)); + + /// Read RecordBatches from the result bufferlist. Since, this step might use + /// threads for decompressing compressed batches, to avoid running into + /// [ARROW-12597], we switch off threaded decompression to avoid nested threading + /// scenarios when scan tasks are executed in parallel by the CpuThreadPool. + arrow::RecordBatchVector batches; + RETURN_NOT_OK(skyhook::DeserializeTable(result, !options_->use_threads, &batches)); + return arrow::MakeVectorIterator(std::move(batches)); + } + + protected: + arrow::dataset::FileSource source_; + std::shared_ptr doa_; + skyhook::SkyhookFileType::type file_format_; + arrow::compute::Expression partition_expression_; +}; + +class SkyhookFileFormat::Impl { + public: + Impl(std::shared_ptr ctx, std::string file_format) + : ctx_(std::move(ctx)), file_format_(std::move(file_format)) {} + + ~Impl() = default; + + arrow::Status Init() { + /// Connect to the RADOS cluster and instantiate a `SkyhookDirectObjectAccess` + /// instance. + auto connection = std::make_shared(ctx_); + RETURN_NOT_OK(connection->Connect()); + doa_ = std::make_shared(connection); + return arrow::Status::OK(); + } + + arrow::Result ScanFile( + const std::shared_ptr& options, + const std::shared_ptr& file) const { + /// Make sure client-side filtering and projection is turned off. + file->apply_compute = false; + + /// Convert string file format name to Enum. + skyhook::SkyhookFileType::type file_format; + if (file_format_ == "parquet") { + file_format = skyhook::SkyhookFileType::type::PARQUET; + } else if (file_format_ == "ipc") { + file_format = skyhook::SkyhookFileType::type::IPC; + } else { + return arrow::Status::Invalid("Unsupported file format ", file_format_); + } + + arrow::dataset::ScanTaskVector v{std::make_shared( + options, file, file->source(), doa_, file_format, file->partition_expression())}; + return arrow::MakeVectorIterator(v); + } + + arrow::Result> Inspect( + const arrow::dataset::FileSource& source) const { + std::shared_ptr file_format; + /// Convert string file format name to Arrow FileFormat. + if (file_format_ == "parquet") { + file_format = std::make_shared(); + } else if (file_format_ == "ipc") { + file_format = std::make_shared(); + } else { + return arrow::Status::Invalid("Unsupported file format ", file_format_); + } + std::shared_ptr schema; + ARROW_ASSIGN_OR_RAISE(schema, file_format->Inspect(source)); + return schema; + } + + private: + std::shared_ptr doa_; + std::shared_ptr ctx_; + std::string file_format_; +}; + +arrow::Result> SkyhookFileFormat::Make( + std::shared_ptr ctx, std::string file_format) { + auto format = + std::make_shared(std::move(ctx), std::move(file_format)); + /// Establish connection to the Ceph cluster. + RETURN_NOT_OK(format->Init()); + return format; +} + +SkyhookFileFormat::SkyhookFileFormat(std::shared_ptr ctx, + std::string file_format) + : impl_(new Impl(std::move(ctx), std::move(file_format))) {} + +SkyhookFileFormat::~SkyhookFileFormat() = default; + +arrow::Status SkyhookFileFormat::Init() { return impl_->Init(); } + +arrow::Result> SkyhookFileFormat::Inspect( + const arrow::dataset::FileSource& source) const { + return impl_->Inspect(source); +} + +arrow::Result SkyhookFileFormat::ScanFile( + const std::shared_ptr& options, + const std::shared_ptr& file) const { + return impl_->ScanFile(options, file); +} + +std::shared_ptr +SkyhookFileFormat::DefaultWriteOptions() { + return nullptr; +} + +arrow::Result> SkyhookFileFormat::MakeWriter( + std::shared_ptr destination, + std::shared_ptr schema, + std::shared_ptr options, + arrow::fs::FileLocator destination_locator) const { + return arrow::Status::NotImplemented("Skyhook writer not yet implemented."); +} + +} // namespace skyhook diff --git a/cpp/src/skyhook/client/file_skyhook.h b/cpp/src/skyhook/client/file_skyhook.h new file mode 100644 index 0000000000000..52a19f5bf3b92 --- /dev/null +++ b/cpp/src/skyhook/client/file_skyhook.h @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include "arrow/api.h" +#include "arrow/dataset/file_parquet.h" +#include "arrow/dataset/scanner.h" +#include "arrow/dataset/type_fwd.h" +#include "arrow/dataset/visibility.h" + +namespace skyhook { + +/// \addtogroup dataset-file-formats +/// +/// @{ + +/// \struct RadosConnCtx +/// \brief A struct to hold the parameters required +/// for connecting to a RADOS cluster. +struct RadosConnCtx { + std::string ceph_config_path; + std::string ceph_data_pool; + std::string ceph_user_name; + std::string ceph_cluster_name; + std::string ceph_cls_name; + + RadosConnCtx(std::string ceph_config_path, std::string ceph_data_pool, + std::string ceph_user_name, std::string ceph_cluster_name, + std::string ceph_cls_name) + : ceph_config_path(std::move(ceph_config_path)), + ceph_data_pool(std::move(ceph_data_pool)), + ceph_user_name(std::move(ceph_user_name)), + ceph_cluster_name(std::move(ceph_cluster_name)), + ceph_cls_name(std::move(ceph_cls_name)) {} +}; + +/// \class SkyhookFileFormat +/// \brief A FileFormat implementation that offloads fragment +/// scan operations to the Ceph OSDs. For more details, see the +/// Skyhook paper, https://arxiv.org/pdf/2105.09894.pdf. +class SkyhookFileFormat : public arrow::dataset::FileFormat { + public: + static arrow::Result> Make( + std::shared_ptr ctx, std::string file_format); + SkyhookFileFormat(std::shared_ptr ctx, std::string file_format); + + ~SkyhookFileFormat() override; + + std::string type_name() const override { return "skyhook"; } + + bool Equals(const arrow::dataset::FileFormat& other) const override { + return type_name() == other.type_name(); + } + + arrow::Result IsSupported( + const arrow::dataset::FileSource& source) const override { + return true; + } + + /// \brief Return the schema of the file fragment. + /// \param[in] source The source of the file fragment. + /// \return The schema of the file fragment. + arrow::Result> Inspect( + const arrow::dataset::FileSource& source) const override; + + /// \brief Scan a file fragment. + /// \param[in] options The ScanOptions to use. + /// \param[in] file The file fragment to scan. + /// \return An iterator of ScanTasks. + arrow::Result ScanFile( + const std::shared_ptr& options, + const std::shared_ptr& file) const override; + + /// \brief Create a writer for this format. + arrow::Result> MakeWriter( + std::shared_ptr destination, + std::shared_ptr schema, + std::shared_ptr options, + arrow::fs::FileLocator destination_locator) const override; + + /// \brief Get default write options for this format. + std::shared_ptr DefaultWriteOptions() override; + + private: + class Impl; + std::unique_ptr impl_; + + /// \brief Initialize the SkyhookFileFormat by connecting to RADOS. + arrow::Status Init(); +}; + +/// @} + +} // namespace skyhook diff --git a/cpp/src/skyhook/cls/cls_skyhook.cc b/cpp/src/skyhook/cls/cls_skyhook.cc new file mode 100644 index 0000000000000..5f50dd04607dd --- /dev/null +++ b/cpp/src/skyhook/cls/cls_skyhook.cc @@ -0,0 +1,267 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include + +#include + +#include "arrow/compute/exec/expression.h" +#include "arrow/dataset/dataset.h" +#include "arrow/dataset/file_ipc.h" +#include "arrow/dataset/file_parquet.h" +#include "arrow/io/interfaces.h" +#include "arrow/result.h" +#include "arrow/util/logging.h" + +#include "skyhook/protocol/skyhook_protocol.h" + +CLS_VER(1, 0) +CLS_NAME(skyhook) + +cls_handle_t h_class; +cls_method_handle_t h_scan_op; + +/// \brief Log skyhook errors using RADOS object class SDK's logger. +void LogSkyhookError(const std::string& msg) { CLS_LOG(0, "error: %s", msg.c_str()); } + +/// \class RandomAccessObject +/// \brief An interface to provide a file-like view over RADOS objects. +class RandomAccessObject : public arrow::io::RandomAccessFile { + public: + explicit RandomAccessObject(cls_method_context_t hctx, int64_t file_size) { + hctx_ = hctx; + content_length_ = file_size; + chunks_ = std::vector>(); + } + + ~RandomAccessObject() override { DCHECK_OK(Close()); } + + /// Check if the file stream is closed. + arrow::Status CheckClosed() const { + if (closed_) { + return arrow::Status::Invalid("Operation on closed stream"); + } + return arrow::Status::OK(); + } + + /// Check if the position of the object is valid. + arrow::Status CheckPosition(int64_t position, const char* action) const { + if (position < 0) { + return arrow::Status::Invalid("Cannot ", action, " from negative position"); + } + if (position > content_length_) { + return arrow::Status::IOError("Cannot ", action, " past end of file"); + } + return arrow::Status::OK(); + } + + arrow::Result ReadAt(int64_t position, int64_t nbytes, void* out) override { + return arrow::Status::NotImplemented( + "ReadAt has not been implemented in RandomAccessObject"); + } + + /// Read a specified number of bytes from a specified position. + arrow::Result> ReadAt(int64_t position, + int64_t nbytes) override { + RETURN_NOT_OK(CheckClosed()); + RETURN_NOT_OK(CheckPosition(position, "read")); + + // No need to allocate more than the remaining number of bytes + nbytes = std::min(nbytes, content_length_ - position); + + if (nbytes > 0) { + std::shared_ptr bl = std::make_shared(); + cls_cxx_read(hctx_, position, nbytes, bl.get()); + chunks_.push_back(bl); + return std::make_shared((uint8_t*)bl->c_str(), bl->length()); + } + return std::make_shared(""); + } + + /// Read a specified number of bytes from the current position. + arrow::Result> Read(int64_t nbytes) override { + ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(pos_, nbytes)); + pos_ += buffer->size(); + return std::move(buffer); + } + + /// Read a specified number of bytes from the current position into an output stream. + arrow::Result Read(int64_t nbytes, void* out) override { + ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, ReadAt(pos_, nbytes, out)); + pos_ += bytes_read; + return bytes_read; + } + + /// Return the size of the file. + arrow::Result GetSize() override { + RETURN_NOT_OK(CheckClosed()); + return content_length_; + } + + /// Sets the file-pointer offset, measured from the beginning of the + /// file, at which the next read or write occurs. + arrow::Status Seek(int64_t position) override { + RETURN_NOT_OK(CheckClosed()); + RETURN_NOT_OK(CheckPosition(position, "seek")); + + pos_ = position; + return arrow::Status::OK(); + } + + /// Returns the file-pointer offset. + arrow::Result Tell() const override { + RETURN_NOT_OK(CheckClosed()); + return pos_; + } + + /// Mark the file as closed. + arrow::Status Close() override { + closed_ = true; + return arrow::Status::OK(); + } + + bool closed() const override { return closed_; } + + private: + cls_method_context_t hctx_; + bool closed_ = false; + int64_t pos_ = 0; + int64_t content_length_ = -1; + std::vector> chunks_; +}; + +/// \brief Driver function to execute the Scan operations. +/// \param[in] hctx RADOS object context. +/// \param[in] req The scan request received from the client. +/// \param[in] format The file format instance to use in the scan. +/// \param[in] fragment_scan_options The fragment scan options to use to customize the +/// scan. +/// \return Table. +arrow::Result> DoScan( + cls_method_context_t hctx, const skyhook::ScanRequest& req, + const std::shared_ptr& format, + const std::shared_ptr& fragment_scan_options) { + auto file = std::make_shared(hctx, req.file_size); + arrow::dataset::FileSource source(file); + ARROW_ASSIGN_OR_RAISE( + auto fragment, format->MakeFragment(std::move(source), req.partition_expression)); + auto options = std::make_shared(); + auto builder = std::make_shared( + req.dataset_schema, std::move(fragment), std::move(options)); + + ARROW_RETURN_NOT_OK(builder->Filter(req.filter_expression)); + ARROW_RETURN_NOT_OK(builder->Project(req.projection_schema->field_names())); + ARROW_RETURN_NOT_OK(builder->UseThreads(true)); + ARROW_RETURN_NOT_OK(builder->FragmentScanOptions(fragment_scan_options)); + + ARROW_ASSIGN_OR_RAISE(auto scanner, builder->Finish()); + ARROW_ASSIGN_OR_RAISE(auto table, scanner->ToTable()); + return table; +} + +/// \brief Scan RADOS objects containing Arrow IPC data. +/// \param[in] hctx The RADOS object context. +/// \param[in] req The scan request received from the client. +/// \return Table. +static arrow::Result> ScanIpcObject( + cls_method_context_t hctx, skyhook::ScanRequest req) { + auto format = std::make_shared(); + auto fragment_scan_options = std::make_shared(); + + ARROW_ASSIGN_OR_RAISE(auto result_table, DoScan(hctx, req, std::move(format), + std::move(fragment_scan_options))); + return result_table; +} + +/// \brief Scan RADOS objects containing Parquet binary data. +/// \param[in] hctx The RADOS object context. +/// \param[in] req The scan request received from the client. +/// \return Table. +static arrow::Result> ScanParquetObject( + cls_method_context_t hctx, skyhook::ScanRequest req) { + auto format = std::make_shared(); + auto fragment_scan_options = + std::make_shared(); + + ARROW_ASSIGN_OR_RAISE(auto result_table, DoScan(hctx, req, std::move(format), + std::move(fragment_scan_options))); + return result_table; +} + +/// \brief The scan operation to execute on the Ceph OSD nodes. The scan request is +/// deserialized, the object is scanned, and the resulting table is serialized +/// and sent back to the client. +/// \param[in] hctx The RADOS object context. +/// \param[in] in A bufferlist containing serialized Scan request. +/// \param[out] out A bufferlist to store the serialized resultant table. +/// \return Exit code. +static int scan_op(cls_method_context_t hctx, ceph::bufferlist* in, + ceph::bufferlist* out) { + // Components required to construct a File fragment. + arrow::Status s; + skyhook::ScanRequest req; + + // Deserialize the scan request. + if (!(s = skyhook::DeserializeScanRequest(*in, &req)).ok()) { + LogSkyhookError(s.message()); + return SCAN_REQ_DESER_ERR_CODE; + } + + // Scan the object. + std::shared_ptr table; + arrow::Result> maybe_table; + switch (req.file_format) { + case skyhook::SkyhookFileType::type::PARQUET: + maybe_table = ScanParquetObject(hctx, std::move(req)); + if (!maybe_table.ok()) { + LogSkyhookError("Could not scan parquet object: " + + maybe_table.status().ToString()); + return SCAN_ERR_CODE; + } + table = *maybe_table; + break; + case skyhook::SkyhookFileType::type::IPC: + maybe_table = ScanIpcObject(hctx, std::move(req)); + if (!maybe_table.ok()) { + LogSkyhookError("Could not scan IPC object: " + maybe_table.status().ToString()); + return SCAN_ERR_CODE; + } + table = *maybe_table; + break; + default: + table = nullptr; + } + if (!table) { + LogSkyhookError("Unsupported file format"); + return SCAN_ERR_CODE; + } + + // Serialize the resultant table to send back to the client. + ceph::bufferlist bl; + if (!(s = skyhook::SerializeTable(table, &bl)).ok()) { + LogSkyhookError(s.message()); + return SCAN_RES_SER_ERR_CODE; + } + + *out = std::move(bl); + return 0; +} + +void __cls_init() { + /// Register the skyhook object classes with the OSD. + cls_register("skyhook", &h_class); + cls_register_cxx_method(h_class, "scan_op", CLS_METHOD_RD, scan_op, &h_scan_op); +} diff --git a/cpp/src/skyhook/cls/cls_skyhook_test.cc b/cpp/src/skyhook/cls/cls_skyhook_test.cc new file mode 100644 index 0000000000000..461cdd6bc79de --- /dev/null +++ b/cpp/src/skyhook/cls/cls_skyhook_test.cc @@ -0,0 +1,207 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "skyhook/client/file_skyhook.h" + +#include "arrow/compute/exec/expression.h" +#include "arrow/dataset/dataset.h" +#include "arrow/dataset/file_base.h" +#include "arrow/filesystem/api.h" +#include "arrow/io/api.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/iterator.h" +#include "gtest/gtest.h" + +#include "parquet/arrow/reader.h" +#include "parquet/arrow/writer.h" + +std::shared_ptr GetSkyhookFormat() { + // The constants below should match the parameters with + // which the Ceph cluster is configured in integration_skyhook.sh. + // Currently, all the default values have been used. + std::string ceph_config_path = "/etc/ceph/ceph.conf"; + std::string ceph_data_pool = "cephfs_data"; + std::string ceph_user_name = "client.admin"; + std::string ceph_cluster_name = "ceph"; + std::string ceph_cls_name = "skyhook"; + std::shared_ptr rados_ctx = + std::make_shared(ceph_config_path, ceph_data_pool, + ceph_user_name, ceph_cluster_name, + ceph_cls_name); + EXPECT_OK_AND_ASSIGN(auto format, + skyhook::SkyhookFileFormat::Make(rados_ctx, "parquet")); + return format; +} + +std::shared_ptr GetParquetFormat() { + return std::make_shared(); +} + +std::shared_ptr GetDatasetFromDirectory( + std::shared_ptr fs, + std::shared_ptr format, std::string dir) { + arrow::fs::FileSelector s; + s.base_dir = std::move(dir); + s.recursive = true; + + arrow::dataset::FileSystemFactoryOptions options; + options.partitioning = std::make_shared( + arrow::schema({arrow::field("payment_type", arrow::int32()), + arrow::field("VendorID", arrow::int32())})); + EXPECT_OK_AND_ASSIGN(auto factory, arrow::dataset::FileSystemDatasetFactory::Make( + std::move(fs), s, std::move(format), options)); + + arrow::dataset::InspectOptions inspect_options; + arrow::dataset::FinishOptions finish_options; + EXPECT_OK_AND_ASSIGN(auto schema, factory->Inspect(inspect_options)); + EXPECT_OK_AND_ASSIGN(auto dataset, factory->Finish(finish_options)); + return dataset; +} + +std::shared_ptr GetFileSystemFromUri(const std::string& uri, + std::string* path) { + EXPECT_OK_AND_ASSIGN(auto fs, arrow::fs::FileSystemFromUri(uri, path)); + return fs; +} + +std::shared_ptr GetDatasetFromPath( + std::shared_ptr fs, + std::shared_ptr format, std::string path) { + EXPECT_OK_AND_ASSIGN(auto info, fs->GetFileInfo(path)); + return GetDatasetFromDirectory(std::move(fs), std::move(format), std::move(path)); +} + +std::shared_ptr GetScannerFromDataset( + const std::shared_ptr& dataset, + std::vector columns, arrow::compute::Expression filter, + bool use_threads) { + EXPECT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan()); + + if (!columns.empty()) { + ARROW_EXPECT_OK(scanner_builder->Project(std::move(columns))); + } + + ARROW_EXPECT_OK(scanner_builder->Filter(std::move(filter))); + ARROW_EXPECT_OK(scanner_builder->UseThreads(use_threads)); + EXPECT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish()); + return scanner; +} + +TEST(TestSkyhookCLS, SelectEntireDataset) { + std::string path; + auto fs = GetFileSystemFromUri("file:///mnt/cephfs/nyc", &path); + std::vector columns; + + auto parquet_format = GetParquetFormat(); + auto dataset = GetDatasetFromPath(fs, parquet_format, path); + auto scanner = + GetScannerFromDataset(dataset, columns, arrow::compute::literal(true), true); + EXPECT_OK_AND_ASSIGN(auto table_parquet, scanner->ToTable()); + + auto skyhook_format = GetSkyhookFormat(); + dataset = GetDatasetFromPath(fs, skyhook_format, path); + scanner = GetScannerFromDataset(dataset, columns, arrow::compute::literal(true), true); + EXPECT_OK_AND_ASSIGN(auto table_skyhook_parquet, scanner->ToTable()); + + ASSERT_EQ(table_parquet->Equals(*table_skyhook_parquet), 1); + ASSERT_EQ(table_parquet->num_rows(), table_skyhook_parquet->num_rows()); +} + +TEST(TestSkyhookCLS, SelectFewRows) { + std::string path; + auto fs = GetFileSystemFromUri("file:///mnt/cephfs/nyc", &path); + std::vector columns; + auto filter = arrow::compute::greater(arrow::compute::field_ref("payment_type"), + arrow::compute::literal(2)); + auto parquet_format = GetParquetFormat(); + auto dataset = GetDatasetFromPath(fs, parquet_format, path); + auto scanner = GetScannerFromDataset(dataset, columns, filter, true); + EXPECT_OK_AND_ASSIGN(auto table_parquet, scanner->ToTable()); + + auto skyhook_format = GetSkyhookFormat(); + dataset = GetDatasetFromPath(fs, skyhook_format, path); + scanner = GetScannerFromDataset(dataset, columns, filter, true); + EXPECT_OK_AND_ASSIGN(auto table_skyhook_parquet, scanner->ToTable()); + + ASSERT_EQ(table_parquet->Equals(*table_skyhook_parquet), 1); + ASSERT_EQ(table_parquet->num_rows(), table_skyhook_parquet->num_rows()); +} + +TEST(TestSkyhookCLS, SelectFewColumns) { + std::string path; + auto fs = GetFileSystemFromUri("file:///mnt/cephfs/nyc", &path); + std::vector columns = {"fare_amount", "total_amount"}; + + auto parquet_format = GetParquetFormat(); + auto dataset = GetDatasetFromPath(fs, parquet_format, path); + auto scanner = + GetScannerFromDataset(dataset, columns, arrow::compute::literal(true), true); + EXPECT_OK_AND_ASSIGN(auto table_parquet, scanner->ToTable()); + + auto skyhook_format = GetSkyhookFormat(); + dataset = GetDatasetFromPath(fs, skyhook_format, path); + scanner = GetScannerFromDataset(dataset, columns, arrow::compute::literal(true), true); + EXPECT_OK_AND_ASSIGN(auto table_skyhook_parquet, scanner->ToTable()); + + ASSERT_EQ(table_parquet->Equals(*table_skyhook_parquet), 1); + ASSERT_EQ(table_parquet->num_rows(), table_skyhook_parquet->num_rows()); +} + +TEST(TestSkyhookCLS, SelectRowsAndColumnsOnPartitionKey) { + std::string path; + auto fs = GetFileSystemFromUri("file:///mnt/cephfs/nyc", &path); + std::vector columns = {"fare_amount", "VendorID", "payment_type"}; + auto filter = arrow::compute::greater(arrow::compute::field_ref("payment_type"), + arrow::compute::literal(2)); + + auto parquet_format = GetParquetFormat(); + auto dataset = GetDatasetFromPath(fs, parquet_format, path); + auto scanner = GetScannerFromDataset(dataset, columns, filter, true); + EXPECT_OK_AND_ASSIGN(auto table_parquet, scanner->ToTable()); + + auto skyhook_format = GetSkyhookFormat(); + dataset = GetDatasetFromPath(fs, skyhook_format, path); + scanner = GetScannerFromDataset(dataset, columns, filter, true); + EXPECT_OK_AND_ASSIGN(auto table_skyhook_parquet, scanner->ToTable()); + + ASSERT_EQ(table_parquet->Equals(*table_skyhook_parquet), 1); + ASSERT_EQ(table_parquet->num_rows(), table_skyhook_parquet->num_rows()); +} + +TEST(TestSkyhookCLS, SelectRowsAndColumnsOnlyOnPartitionKey) { + std::string path; + auto fs = GetFileSystemFromUri("file:///mnt/cephfs/nyc", &path); + std::vector columns = {"total_amount", "VendorID", "payment_type"}; + auto filter = arrow::compute::and_( + arrow::compute::greater(arrow::compute::field_ref("payment_type"), + arrow::compute::literal(2)), + arrow::compute::greater(arrow::compute::field_ref("VendorID"), + arrow::compute::literal(1))); + + auto parquet_format = GetParquetFormat(); + auto dataset = GetDatasetFromPath(fs, parquet_format, path); + auto scanner = GetScannerFromDataset(dataset, columns, filter, true); + EXPECT_OK_AND_ASSIGN(auto table_parquet, scanner->ToTable()); + + auto skyhook_format = GetSkyhookFormat(); + dataset = GetDatasetFromPath(fs, skyhook_format, path); + scanner = GetScannerFromDataset(dataset, columns, filter, true); + EXPECT_OK_AND_ASSIGN(auto table_skyhook_parquet, scanner->ToTable()); + + ASSERT_EQ(table_parquet->Equals(*table_skyhook_parquet), 1); + ASSERT_EQ(table_parquet->num_rows(), table_skyhook_parquet->num_rows()); +} diff --git a/cpp/src/skyhook/protocol/ScanRequest.fbs b/cpp/src/skyhook/protocol/ScanRequest.fbs new file mode 100644 index 0000000000000..870d603a5ea28 --- /dev/null +++ b/cpp/src/skyhook/protocol/ScanRequest.fbs @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +namespace org.apache.arrow.flatbuf; + +table ScanRequest { + file_size: long; + file_format: short; + filter: [ubyte]; + partition: [ubyte]; + dataset_schema: [ubyte]; + projection_schema: [ubyte]; +} + +root_type ScanRequest; diff --git a/cpp/src/skyhook/protocol/ScanRequest_generated.h b/cpp/src/skyhook/protocol/ScanRequest_generated.h new file mode 100644 index 0000000000000..884857a1b4df7 --- /dev/null +++ b/cpp/src/skyhook/protocol/ScanRequest_generated.h @@ -0,0 +1,167 @@ +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_SCANREQUEST_ORG_APACHE_ARROW_FLATBUF_H_ +#define FLATBUFFERS_GENERATED_SCANREQUEST_ORG_APACHE_ARROW_FLATBUF_H_ + +#include "flatbuffers/flatbuffers.h" + +namespace org { +namespace apache { +namespace arrow { +namespace flatbuf { + +struct ScanRequest; + +struct ScanRequest FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_FILE_SIZE = 4, + VT_FILE_FORMAT = 6, + VT_FILTER = 8, + VT_PARTITION = 10, + VT_DATASET_SCHEMA = 12, + VT_PROJECTION_SCHEMA = 14 + }; + int64_t file_size() const { + return GetField(VT_FILE_SIZE, 0); + } + int16_t file_format() const { + return GetField(VT_FILE_FORMAT, 0); + } + const flatbuffers::Vector *filter() const { + return GetPointer *>(VT_FILTER); + } + const flatbuffers::Vector *partition() const { + return GetPointer *>(VT_PARTITION); + } + const flatbuffers::Vector *dataset_schema() const { + return GetPointer *>(VT_DATASET_SCHEMA); + } + const flatbuffers::Vector *projection_schema() const { + return GetPointer *>(VT_PROJECTION_SCHEMA); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FILE_SIZE) && + VerifyField(verifier, VT_FILE_FORMAT) && + VerifyOffset(verifier, VT_FILTER) && + verifier.VerifyVector(filter()) && + VerifyOffset(verifier, VT_PARTITION) && + verifier.VerifyVector(partition()) && + VerifyOffset(verifier, VT_DATASET_SCHEMA) && + verifier.VerifyVector(dataset_schema()) && + VerifyOffset(verifier, VT_PROJECTION_SCHEMA) && + verifier.VerifyVector(projection_schema()) && + verifier.EndTable(); + } +}; + +struct ScanRequestBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_file_size(int64_t file_size) { + fbb_.AddElement(ScanRequest::VT_FILE_SIZE, file_size, 0); + } + void add_file_format(int16_t file_format) { + fbb_.AddElement(ScanRequest::VT_FILE_FORMAT, file_format, 0); + } + void add_filter(flatbuffers::Offset> filter) { + fbb_.AddOffset(ScanRequest::VT_FILTER, filter); + } + void add_partition(flatbuffers::Offset> partition) { + fbb_.AddOffset(ScanRequest::VT_PARTITION, partition); + } + void add_dataset_schema(flatbuffers::Offset> dataset_schema) { + fbb_.AddOffset(ScanRequest::VT_DATASET_SCHEMA, dataset_schema); + } + void add_projection_schema(flatbuffers::Offset> projection_schema) { + fbb_.AddOffset(ScanRequest::VT_PROJECTION_SCHEMA, projection_schema); + } + explicit ScanRequestBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ScanRequestBuilder &operator=(const ScanRequestBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateScanRequest( + flatbuffers::FlatBufferBuilder &_fbb, + int64_t file_size = 0, + int16_t file_format = 0, + flatbuffers::Offset> filter = 0, + flatbuffers::Offset> partition = 0, + flatbuffers::Offset> dataset_schema = 0, + flatbuffers::Offset> projection_schema = 0) { + ScanRequestBuilder builder_(_fbb); + builder_.add_file_size(file_size); + builder_.add_projection_schema(projection_schema); + builder_.add_dataset_schema(dataset_schema); + builder_.add_partition(partition); + builder_.add_filter(filter); + builder_.add_file_format(file_format); + return builder_.Finish(); +} + +inline flatbuffers::Offset CreateScanRequestDirect( + flatbuffers::FlatBufferBuilder &_fbb, + int64_t file_size = 0, + int16_t file_format = 0, + const std::vector *filter = nullptr, + const std::vector *partition = nullptr, + const std::vector *dataset_schema = nullptr, + const std::vector *projection_schema = nullptr) { + auto filter__ = filter ? _fbb.CreateVector(*filter) : 0; + auto partition__ = partition ? _fbb.CreateVector(*partition) : 0; + auto dataset_schema__ = dataset_schema ? _fbb.CreateVector(*dataset_schema) : 0; + auto projection_schema__ = projection_schema ? _fbb.CreateVector(*projection_schema) : 0; + return org::apache::arrow::flatbuf::CreateScanRequest( + _fbb, + file_size, + file_format, + filter__, + partition__, + dataset_schema__, + projection_schema__); +} + +inline const org::apache::arrow::flatbuf::ScanRequest *GetScanRequest(const void *buf) { + return flatbuffers::GetRoot(buf); +} + +inline const org::apache::arrow::flatbuf::ScanRequest *GetSizePrefixedScanRequest(const void *buf) { + return flatbuffers::GetSizePrefixedRoot(buf); +} + +inline bool VerifyScanRequestBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(nullptr); +} + +inline bool VerifySizePrefixedScanRequestBuffer( + flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(nullptr); +} + +inline void FinishScanRequestBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.Finish(root); +} + +inline void FinishSizePrefixedScanRequestBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root); +} + +} // namespace flatbuf +} // namespace arrow +} // namespace apache +} // namespace org + +#endif // FLATBUFFERS_GENERATED_SCANREQUEST_ORG_APACHE_ARROW_FLATBUF_H_ diff --git a/cpp/src/skyhook/protocol/rados_protocol.cc b/cpp/src/skyhook/protocol/rados_protocol.cc new file mode 100644 index 0000000000000..cb1acec1faa91 --- /dev/null +++ b/cpp/src/skyhook/protocol/rados_protocol.cc @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "skyhook/protocol/rados_protocol.h" + +#include "arrow/util/io_util.h" + +#include +#include + +namespace skyhook { +namespace rados { + +template +arrow::Status GetStatusFromReturnCode(int code, Args&&... args) { + if (code) + return arrow::internal::StatusFromErrno(code, arrow::StatusCode::Invalid, + std::forward(args)...); + return arrow::Status::OK(); +} + +arrow::Status IoCtxInterface::read(const std::string& oid, ceph::bufferlist& bl, + size_t len, uint64_t offset) { + return GetStatusFromReturnCode(ioCtx->read(oid, bl, len, offset), + "ioctx->read failed."); +} + +arrow::Status IoCtxInterface::exec(const std::string& oid, const char* cls, + const char* method, ceph::bufferlist& in, + ceph::bufferlist& out) { + return GetStatusFromReturnCode(ioCtx->exec(oid, cls, method, in, out), + "ioctx->exec failed."); +} + +arrow::Status IoCtxInterface::stat(const std::string& oid, uint64_t* psize) { + return GetStatusFromReturnCode(ioCtx->stat(oid, psize, NULL), "ioctx->stat failed."); +} + +arrow::Status RadosInterface::init2(const char* const name, const char* const clustername, + uint64_t flags) { + return GetStatusFromReturnCode(cluster->init2(name, clustername, flags), + "rados->init failed."); +} + +arrow::Status RadosInterface::ioctx_create(const char* name, IoCtxInterface* pioctx) { + librados::IoCtx ioCtx; + int ret = cluster->ioctx_create(name, ioCtx); + pioctx->setIoCtx(&ioCtx); + return GetStatusFromReturnCode(ret, "rados->ioctx_create failed."); +} + +arrow::Status RadosInterface::conf_read_file(const char* const path) { + return GetStatusFromReturnCode(cluster->conf_read_file(path), + "rados->conf_read_file failed."); +} + +arrow::Status RadosInterface::connect() { + return GetStatusFromReturnCode(cluster->connect(), "rados->connect failed."); +} + +void RadosInterface::shutdown() { cluster->shutdown(); } + +RadosConn::~RadosConn() { Shutdown(); } + +arrow::Status RadosConn::Connect() { + if (connected) { + return arrow::Status::OK(); + } + + ARROW_RETURN_NOT_OK( + rados->init2(ctx->ceph_user_name.c_str(), ctx->ceph_cluster_name.c_str(), 0)); + ARROW_RETURN_NOT_OK(rados->conf_read_file(ctx->ceph_config_path.c_str())); + ARROW_RETURN_NOT_OK(rados->connect()); + ARROW_RETURN_NOT_OK(rados->ioctx_create(ctx->ceph_data_pool.c_str(), io_ctx.get())); + return arrow::Status::OK(); +} + +void RadosConn::Shutdown() { + if (connected) { + rados->shutdown(); + connected = false; + } +} + +} // namespace rados +} // namespace skyhook diff --git a/cpp/src/skyhook/protocol/rados_protocol.h b/cpp/src/skyhook/protocol/rados_protocol.h new file mode 100644 index 0000000000000..3e5fac7640b65 --- /dev/null +++ b/cpp/src/skyhook/protocol/rados_protocol.h @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include + +#include "arrow/status.h" +#include "arrow/util/make_unique.h" + +#include "skyhook/client/file_skyhook.h" + +namespace skyhook { +namespace rados { + +class IoCtxInterface { + public: + IoCtxInterface() { ioCtx = arrow::internal::make_unique(); } + /// \brief Read from a RADOS object. + /// + /// \param[in] oid the ID of the object to read. + /// \param[in] bl a bufferlist to hold the contents of the read object. + /// \param[in] len the length of data to read from the object. + /// \param[in] offset the offset to read from in the object. + arrow::Status read(const std::string& oid, ceph::bufferlist& bl, size_t len, + uint64_t offset); + /// \brief Executes a Ceph Object Class method. + /// + /// \param[in] oid the object ID on which to invoke the CLS function. + /// \param[in] cls the name of the object class. + /// \param[in] method the name of the object class method. + /// \param[in] in a bufferlist to send data to the object class method. + /// \param[in] out a bufferlist to recieve data from the object class method. + arrow::Status exec(const std::string& oid, const char* cls, const char* method, + ceph::bufferlist& in, ceph::bufferlist& out); + /// \brief Execute POSIX stat on a RADOS object. + /// + /// \param[in] oid the object ID on which to call stat. + /// \param[out] psize hold the size of the object. + arrow::Status stat(const std::string& oid, uint64_t* psize); + /// \brief Set the `librados::IoCtx` instance inside a IoCtxInterface instance. + void setIoCtx(librados::IoCtx* ioCtx_) { *ioCtx = *ioCtx_; } + + private: + std::unique_ptr ioCtx; +}; + +class RadosInterface { + public: + RadosInterface() { cluster = arrow::internal::make_unique(); } + /// Initializes a cluster handle. + arrow::Status init2(const char* const name, const char* const clustername, + uint64_t flags); + /// Create an I/O context + arrow::Status ioctx_create(const char* name, IoCtxInterface* pioctx); + /// Read the Ceph config file. + arrow::Status conf_read_file(const char* const path); + /// Connect to the Ceph cluster. + arrow::Status connect(); + /// Close connection to the Ceph cluster. + void shutdown(); + + private: + std::unique_ptr cluster; +}; + +/// Connect to a Ceph cluster and hold the connection +/// information for use in later stages. +class RadosConn { + public: + explicit RadosConn(std::shared_ptr ctx) + : ctx(std::move(ctx)), + rados(arrow::internal::make_unique()), + io_ctx(arrow::internal::make_unique()), + connected(false) {} + ~RadosConn(); + /// Connect to the Ceph cluster. + arrow::Status Connect(); + /// Shutdown the connection to the Ceph + /// cluster if already connected. + void Shutdown(); + + std::shared_ptr ctx; + std::unique_ptr rados; + std::unique_ptr io_ctx; + bool connected; +}; + +} // namespace rados +} // namespace skyhook diff --git a/cpp/src/skyhook/protocol/skyhook_protocol.cc b/cpp/src/skyhook/protocol/skyhook_protocol.cc new file mode 100644 index 0000000000000..c261048197209 --- /dev/null +++ b/cpp/src/skyhook/protocol/skyhook_protocol.cc @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "skyhook/protocol/skyhook_protocol.h" + +#include + +#include "ScanRequest_generated.h" +#include "arrow/io/api.h" +#include "arrow/ipc/api.h" +#include "arrow/result.h" +#include "arrow/util/io_util.h" + +namespace skyhook { + +namespace flatbuf = org::apache::arrow::flatbuf; + +arrow::Status SerializeScanRequest(ScanRequest& req, ceph::bufferlist* bl) { + ARROW_ASSIGN_OR_RAISE(auto filter_expression, + arrow::compute::Serialize(req.filter_expression)); + ARROW_ASSIGN_OR_RAISE(auto partition_expression, + arrow::compute::Serialize(req.partition_expression)); + ARROW_ASSIGN_OR_RAISE(auto projection_schema, + arrow::ipc::SerializeSchema(*req.projection_schema)); + ARROW_ASSIGN_OR_RAISE(auto dataset_schema, + arrow::ipc::SerializeSchema(*req.dataset_schema)); + + flatbuffers::FlatBufferBuilder builder(1024); + auto filter_expression_vector = + builder.CreateVector(filter_expression->data(), filter_expression->size()); + auto partition_expression_vector = + builder.CreateVector(partition_expression->data(), partition_expression->size()); + auto projected_schema_vector = + builder.CreateVector(projection_schema->data(), projection_schema->size()); + auto dataset_schema_vector = + builder.CreateVector(dataset_schema->data(), dataset_schema->size()); + + auto request = flatbuf::CreateScanRequest( + builder, req.file_size, static_cast(req.file_format), filter_expression_vector, + partition_expression_vector, dataset_schema_vector, projected_schema_vector); + builder.Finish(request); + uint8_t* buf = builder.GetBufferPointer(); + int size = builder.GetSize(); + + bl->append(reinterpret_cast(buf), size); + return arrow::Status::OK(); +} + +arrow::Status DeserializeScanRequest(ceph::bufferlist& bl, ScanRequest* req) { + auto request = flatbuf::GetScanRequest((uint8_t*)bl.c_str()); + + ARROW_ASSIGN_OR_RAISE(auto filter_expression, + arrow::compute::Deserialize(std::make_shared( + request->filter()->data(), request->filter()->size()))); + req->filter_expression = filter_expression; + + ARROW_ASSIGN_OR_RAISE(auto partition_expression, + arrow::compute::Deserialize(std::make_shared( + request->partition()->data(), request->partition()->size()))); + req->partition_expression = partition_expression; + + arrow::ipc::DictionaryMemo empty_memo; + arrow::io::BufferReader projection_schema_reader(request->projection_schema()->data(), + request->projection_schema()->size()); + arrow::io::BufferReader dataset_schema_reader(request->dataset_schema()->data(), + request->dataset_schema()->size()); + + ARROW_ASSIGN_OR_RAISE(req->projection_schema, + arrow::ipc::ReadSchema(&projection_schema_reader, &empty_memo)); + ARROW_ASSIGN_OR_RAISE(req->dataset_schema, + arrow::ipc::ReadSchema(&dataset_schema_reader, &empty_memo)); + + req->file_size = request->file_size(); + req->file_format = (SkyhookFileType::type)request->file_format(); + return arrow::Status::OK(); +} + +arrow::Status SerializeTable(const std::shared_ptr& table, + ceph::bufferlist* bl) { + ARROW_ASSIGN_OR_RAISE(auto buffer_output_stream, + arrow::io::BufferOutputStream::Create()); + + auto options = arrow::ipc::IpcWriteOptions::Defaults(); + auto codec = arrow::Compression::LZ4_FRAME; + + ARROW_ASSIGN_OR_RAISE(options.codec, arrow::util::Codec::Create(codec)); + ARROW_ASSIGN_OR_RAISE(auto writer, arrow::ipc::MakeStreamWriter( + buffer_output_stream, table->schema(), options)); + + ARROW_RETURN_NOT_OK(writer->WriteTable(*table)); + ARROW_RETURN_NOT_OK(writer->Close()); + + ARROW_ASSIGN_OR_RAISE(auto buffer, buffer_output_stream->Finish()); + bl->append(reinterpret_cast(buffer->data()), buffer->size()); + return arrow::Status::OK(); +} + +arrow::Status DeserializeTable(ceph::bufferlist& bl, bool use_threads, + arrow::RecordBatchVector* batches) { + auto buffer = std::make_shared((uint8_t*)bl.c_str(), bl.length()); + auto buffer_reader = std::make_shared(buffer); + auto options = arrow::ipc::IpcReadOptions::Defaults(); + options.use_threads = use_threads; + ARROW_ASSIGN_OR_RAISE( + auto reader, arrow::ipc::RecordBatchStreamReader::Open(buffer_reader, options)); + ARROW_RETURN_NOT_OK(reader->ReadAll(batches)); + return arrow::Status::OK(); +} + +arrow::Status ExecuteObjectClassFn(const std::shared_ptr& connection, + const std::string& oid, const std::string& fn, + ceph::bufferlist& in, ceph::bufferlist& out) { + int e = arrow::internal::ErrnoFromStatus(connection->io_ctx->exec( + oid.c_str(), connection->ctx->ceph_cls_name.c_str(), fn.c_str(), in, out)); + + if (e == SCAN_ERR_CODE) return arrow::Status::Invalid(SCAN_ERR_MSG); + if (e == SCAN_REQ_DESER_ERR_CODE) return arrow::Status::Invalid(SCAN_REQ_DESER_ERR_MSG); + if (e == SCAN_RES_SER_ERR_CODE) return arrow::Status::Invalid(SCAN_RES_SER_ERR_MSG); + if (e != 0) return arrow::Status::Invalid(SCAN_UNKNOWN_ERR_MSG); + return arrow::Status::OK(); +} + +} // namespace skyhook diff --git a/cpp/src/skyhook/protocol/skyhook_protocol.h b/cpp/src/skyhook/protocol/skyhook_protocol.h new file mode 100644 index 0000000000000..b4f6d6ee1b477 --- /dev/null +++ b/cpp/src/skyhook/protocol/skyhook_protocol.h @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#pragma once + +#include "skyhook/protocol/rados_protocol.h" + +#include +#include + +#include "arrow/compute/exec/expression.h" +#include "arrow/record_batch.h" +#include "arrow/table.h" +#include "arrow/type.h" + +#define SCAN_UNKNOWN_ERR_MSG "something went wrong while scanning file fragment" +#define SCAN_ERR_CODE 25 +#define SCAN_ERR_MSG "failed to scan file fragment" +#define SCAN_REQ_DESER_ERR_CODE 26 +#define SCAN_REQ_DESER_ERR_MSG "failed to deserialize scan request" +#define SCAN_RES_SER_ERR_CODE 27 +#define SCAN_RES_SER_ERR_MSG "failed to serialize result table" + +namespace skyhook { + +/// An enum to represent the different +/// types of file formats that Skyhook supports. +struct SkyhookFileType { + enum type { PARQUET, IPC }; +}; + +/// A struct encapsulating all the parameters +/// required to be serialized in the form of flatbuffers for +/// sending to the cls. +struct ScanRequest { + arrow::compute::Expression filter_expression; + arrow::compute::Expression partition_expression; + std::shared_ptr projection_schema; + std::shared_ptr dataset_schema; + int64_t file_size; + SkyhookFileType::type file_format; +}; + +/// Utility functions to serialize and deserialize scan requests and result Arrow tables. +arrow::Status SerializeScanRequest(ScanRequest& req, ceph::bufferlist* bl); +arrow::Status DeserializeScanRequest(ceph::bufferlist& bl, ScanRequest* req); +arrow::Status SerializeTable(const std::shared_ptr& table, + ceph::bufferlist* bl); +arrow::Status DeserializeTable(ceph::bufferlist& bl, bool use_threads, + arrow::RecordBatchVector* batches); + +/// Utility function to invoke a RADOS object class function on an RADOS object. +arrow::Status ExecuteObjectClassFn(const std::shared_ptr& connection, + const std::string& oid, const std::string& fn, + ceph::bufferlist& in, ceph::bufferlist& out); + +/// An interface for translating the name of a file in CephFS to its +/// corresponding object ID in RADOS assuming 1:1 mapping between a file +/// and it's underlying object. +class SkyhookDirectObjectAccess { + public: + explicit SkyhookDirectObjectAccess(std::shared_ptr connection) + : connection_(std::move(connection)) {} + + ~SkyhookDirectObjectAccess() = default; + + /// Execute a POSIX stat on a file. + arrow::Status Stat(const std::string& path, struct stat& st) { + struct stat file_st; + if (stat(path.c_str(), &file_st) < 0) + return arrow::Status::Invalid("stat returned non-zero exit code."); + st = file_st; + return arrow::Status::OK(); + } + + /// Convert a file inode to RADOS object ID. + std::string ConvertInodeToOID(uint64_t inode) { + std::stringstream ss; + /// In Ceph, the underlying stripes that make up a file are + /// named in the format [hex(inode)].[8-bit-binary(stripe_index)]. + ss << std::hex << inode; + + /// Since in Skyhook, we ensure a single stripe per file, + /// we can assume the stripe index to be always 0 and hence + /// hardcode it's 8-bit binary form. + std::string oid(ss.str() + ".00000000"); + return oid; + } + + /// Execute an object class method. It uses the `librados::exec` api to + /// perform object clsass method calls on the storage node and + /// stores the result in an output bufferlist. + arrow::Status Exec(uint64_t inode, const std::string& fn, ceph::bufferlist& in, + ceph::bufferlist& out) { + std::string oid = ConvertInodeToOID(inode); + return ExecuteObjectClassFn(connection_, oid, fn, in, out); + } + + private: + std::shared_ptr connection_; +}; + +} // namespace skyhook diff --git a/cpp/src/skyhook/protocol/skyhook_protocol_test.cc b/cpp/src/skyhook/protocol/skyhook_protocol_test.cc new file mode 100644 index 0000000000000..1d3af3ef72cf1 --- /dev/null +++ b/cpp/src/skyhook/protocol/skyhook_protocol_test.cc @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "skyhook/protocol/skyhook_protocol.h" + +#include "arrow/compute/exec/expression.h" +#include "arrow/dataset/test_util.h" +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" + +std::shared_ptr CreateTable() { + auto schema = arrow::schema({ + {arrow::field("a", arrow::uint8())}, + {arrow::field("b", arrow::uint32())}, + }); + + std::shared_ptr table; + return TableFromJSON(schema, {R"([{"a": null, "b": 5}, + {"a": 1, "b": 3}, + {"a": 3, "b": null}, + {"a": null, "b": null}, + {"a": 2, "b": 5}, + {"a": 1, "b": 5} + ])"}); +} + +TEST(TestSkyhookProtocol, SerDeserScanRequest) { + ceph::bufferlist bl; + skyhook::ScanRequest req; + req.filter_expression = arrow::compute::literal(true); + req.partition_expression = arrow::compute::literal(false); + req.projection_schema = arrow::schema({arrow::field("a", arrow::int64())}); + req.dataset_schema = arrow::schema({arrow::field("a", arrow::int64())}); + req.file_size = 1000000; + req.file_format = skyhook::SkyhookFileType::type::IPC; + ASSERT_OK(skyhook::SerializeScanRequest(req, &bl)); + + skyhook::ScanRequest req_; + ASSERT_OK(skyhook::DeserializeScanRequest(bl, &req_)); + ASSERT_TRUE(req.filter_expression.Equals(req_.filter_expression)); + ASSERT_TRUE(req.partition_expression.Equals(req_.partition_expression)); + ASSERT_TRUE(req.projection_schema->Equals(req_.projection_schema)); + ASSERT_TRUE(req.dataset_schema->Equals(req_.dataset_schema)); + ASSERT_EQ(req.file_size, req_.file_size); + ASSERT_EQ(req.file_format, req_.file_format); +} + +TEST(TestSkyhookProtocol, SerDeserTable) { + std::shared_ptr table = CreateTable(); + ceph::bufferlist bl; + ASSERT_OK(skyhook::SerializeTable(table, &bl)); + + arrow::RecordBatchVector batches; + ASSERT_OK(skyhook::DeserializeTable(bl, false, &batches)); + ASSERT_OK_AND_ASSIGN(auto materialized_table, arrow::Table::FromRecordBatches(batches)); + + ASSERT_TRUE(table->Equals(*materialized_table)); +} diff --git a/cpp/src/skyhook/skyhook.pc.in b/cpp/src/skyhook/skyhook.pc.in new file mode 100644 index 0000000000000..a3a4da5ee9c93 --- /dev/null +++ b/cpp/src/skyhook/skyhook.pc.in @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +prefix=@CMAKE_INSTALL_PREFIX@ +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ + +Name: Skyhook +Description: Skyhook is a plugin for offloading computations into Ceph. +Version: @SKYHOOK_VERSION@ +Requires: arrow_dataset +Libs: -L${libdir} -larrow_skyhook_client \ No newline at end of file diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 2c6784e314b80..c007e7f428af0 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -896,6 +896,15 @@ tasks: UBUNTU: 20.04 image: ubuntu-cpp-bundled + test-skyhook-integration: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 20.04 + flags: -e ARROW_SKYHOOK=ON + image: ubuntu-cpp + test-debian-11-cpp: ci: github template: docker-tests/github.linux.yml diff --git a/docker-compose.yml b/docker-compose.yml index 455424d2cc54a..93314e440a2da 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -298,9 +298,11 @@ services: volumes: &debian-volumes - .:/arrow:delegated - ${DOCKER_VOLUME_PREFIX}debian-ccache:/ccache:delegated + # integration_skyhook.sh is a no-op unless skyhook is on. command: &cpp-command > /bin/bash -c " /arrow/ci/scripts/cpp_build.sh /arrow /build && + /arrow/ci/scripts/integration_skyhook.sh /build && /arrow/ci/scripts/cpp_test.sh /arrow /build" ubuntu-cpp: @@ -324,6 +326,7 @@ services: gcc_version: ${GCC_VERSION} shm_size: *shm-size ulimits: *ulimits + privileged: true environment: <<: *ccache ARROW_ENABLE_TIMING_TESTS: # inherit From 9bd7b32d0692a6d718147483a4e41c3cba67b826 Mon Sep 17 00:00:00 2001 From: Benson Muite Date: Fri, 22 Oct 2021 15:34:42 -0400 Subject: [PATCH 007/194] ARROW-14391: [Docs] Archery requires docker Closes #11472 from bkmgit/ARROW-14391 Lead-authored-by: Benson Muite Co-authored-by: David Li Signed-off-by: David Li --- docs/source/developers/archery.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source/developers/archery.rst b/docs/source/developers/archery.rst index a587975d6c9e4..3f7cbee8fb4e1 100644 --- a/docs/source/developers/archery.rst +++ b/docs/source/developers/archery.rst @@ -26,7 +26,7 @@ utility called Archery. Installation ------------ -Archery requires Python 3.6 or later. It is recommended to install archery in +Archery requires Python 3.6 or later. It is recommended to install Archery in *editable* mode with the ``-e`` flag to automatically update the installation when pulling the Arrow repository. After cloning the Arrow repository, from the top level directory install Archery by using the command @@ -35,6 +35,10 @@ the top level directory install Archery by using the command pip install -e dev/archery[all] +Many operations in Archery make use of `Docker `_ +and `docker-compose `_, which you may +also want to install. + Usage ----- @@ -85,3 +89,6 @@ help output, for example: images List the available docker-compose images. push Push the generated docker-compose image. run Execute docker-compose builds. + +A more detailed introduction to using docker with +Archery is available in a separate :ref:`page `. From 2dcafa19588246ff7b7abc553f6e8ab2c1989965 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Sat, 23 Oct 2021 22:42:31 +0800 Subject: [PATCH 008/194] ARROW-13981: [Java] VectorSchemaRootAppender doesn't work for BitVector Please see: https://issues.apache.org/jira/browse/ARROW-13981 Closes #11317 from liyafan82/fly_1002_app Authored-by: liyafan82 Signed-off-by: liyafan82 --- .../arrow/vector/util/VectorAppender.java | 14 +++++++--- .../arrow/vector/util/TestVectorAppender.java | 27 +++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index e5809e93ea802..ea78917c3ddb2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -25,6 +25,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; @@ -83,9 +84,16 @@ public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) { deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer()); // append data buffer - PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(), - targetVector.getDataBuffer().memoryAddress() + deltaVector.getTypeWidth() * targetVector.getValueCount(), - deltaVector.getTypeWidth() * deltaVector.getValueCount()); + if (targetVector instanceof BitVector) { + // special processing for bit vector, as its type width is 0 + BitVectorHelper.concatBits(targetVector.getDataBuffer(), targetVector.getValueCount(), + deltaVector.getDataBuffer(), deltaVector.getValueCount(), targetVector.getDataBuffer()); + + } else { + PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(), + targetVector.getDataBuffer().memoryAddress() + deltaVector.getTypeWidth() * targetVector.getValueCount(), + deltaVector.getTypeWidth() * deltaVector.getValueCount()); + } targetVector.setValueCount(newValueCount); return targetVector; } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index 1cd26312008bb..25d26623d5c05 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -27,6 +27,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.LargeVarCharVector; @@ -96,6 +97,32 @@ public void testAppendFixedWidthVector() { } } + @Test + public void testAppendBitVector() { + final int length1 = 10; + final int length2 = 5; + try (BitVector target = new BitVector("", allocator); + BitVector delta = new BitVector("", allocator)) { + + target.allocateNew(length1); + delta.allocateNew(length2); + + ValueVectorDataPopulator.setVector(target, 0, 1, 0, 1, 0, 1, 0, null, 0, 1); + ValueVectorDataPopulator.setVector(delta, null, 1, 1, 0, 0); + + VectorAppender appender = new VectorAppender(target); + delta.accept(appender, null); + + assertEquals(length1 + length2, target.getValueCount()); + + try (BitVector expected = new BitVector("expected", allocator)) { + expected.allocateNew(); + ValueVectorDataPopulator.setVector(expected, 0, 1, 0, 1, 0, 1, 0, null, 0, 1, null, 1, 1, 0, 0); + assertVectorsEqual(expected, target); + } + } + } + @Test public void testAppendEmptyFixedWidthVector() { try (IntVector target = new IntVector("", allocator); From aecdc0bd75ef14095ec2a560885c3f4e059bc730 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Sat, 23 Oct 2021 14:09:59 -0400 Subject: [PATCH 009/194] ARROW-13984: [Go][Parquet] file handling for go parquet, just the readers This implements the file/column and page readers for Parquet files. In order to keep this smaller, I've only included what was necessary for the readers and will make a separate PR for the file and column writers after this. Closes #11146 from zeroshade/goparquet-file Lead-authored-by: Matthew Topol Co-authored-by: Matt Topol Signed-off-by: Matthew Topol --- go/parquet/file/column_reader.go | 498 ++++++++++++++ go/parquet/file/column_reader_test.go | 450 +++++++++++++ go/parquet/file/column_reader_types.gen.go | 299 +++++++++ .../file/column_reader_types.gen.go.tmpl | 62 ++ go/parquet/file/file_reader.go | 336 ++++++++++ go/parquet/file/file_reader_test.go | 304 +++++++++ go/parquet/file/level_conversion.go | 262 ++++++++ go/parquet/file/level_conversion_test.go | 194 ++++++ go/parquet/file/page_reader.go | 620 ++++++++++++++++++ go/parquet/file/row_group_reader.go | 130 ++++ go/parquet/go.sum | 1 + go/parquet/internal/bmi/bitmap_bmi2_noasm.go | 24 + go/parquet/internal/bmi/bmi.go | 2 +- .../internal/encoding/boolean_decoder.go | 4 +- .../internal/encoding/boolean_encoder.go | 3 + .../internal/encoding/typed_encoder.gen.go | 158 ++++- .../encoding/typed_encoder.gen.go.tmpl | 46 +- go/parquet/internal/testutils/pagebuilder.go | 297 +++++++++ go/parquet/reader_properties.go | 3 +- go/parquet/types.go | 10 + 20 files changed, 3689 insertions(+), 14 deletions(-) create mode 100644 go/parquet/file/column_reader.go create mode 100644 go/parquet/file/column_reader_test.go create mode 100644 go/parquet/file/column_reader_types.gen.go create mode 100644 go/parquet/file/column_reader_types.gen.go.tmpl create mode 100644 go/parquet/file/file_reader.go create mode 100644 go/parquet/file/file_reader_test.go create mode 100644 go/parquet/file/level_conversion.go create mode 100644 go/parquet/file/level_conversion_test.go create mode 100644 go/parquet/file/page_reader.go create mode 100644 go/parquet/file/row_group_reader.go create mode 100644 go/parquet/internal/bmi/bitmap_bmi2_noasm.go create mode 100644 go/parquet/internal/testutils/pagebuilder.go diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go new file mode 100644 index 0000000000000..79c6479b05b0d --- /dev/null +++ b/go/parquet/file/column_reader.go @@ -0,0 +1,498 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/encoding" + "github.com/apache/arrow/go/parquet/internal/encryption" + format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/schema" + "golang.org/x/xerrors" +) + +const ( + // 4 MB is the default maximum page header size + defaultMaxPageHeaderSize = 4 * 1024 * 1024 + // 16 KB is the default expected page header size + defaultPageHeaderSize = 16 * 1024 +) + +//go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=../internal/encoding/physical_types.tmpldata column_reader_types.gen.go.tmpl + +func isDictIndexEncoding(e format.Encoding) bool { + return e == format.Encoding_RLE_DICTIONARY || e == format.Encoding_PLAIN_DICTIONARY +} + +// CryptoContext is a context for keeping track of the current methods for decrypting. +// It keeps track of the row group and column numbers along with references to the +// decryptor objects. +type CryptoContext struct { + StartDecryptWithDictionaryPage bool + RowGroupOrdinal int16 + ColumnOrdinal int16 + MetaDecryptor encryption.Decryptor + DataDecryptor encryption.Decryptor +} + +// ColumnChunkReader is the basic interface for all column readers. It will use +// a page reader to read all the pages in a column chunk from a row group. +// +// To actually Read out the column data, you need to convert to the properly +// typed ColumnChunkReader type such as *BooleanColumnReader etc. +// +// Some things to clarify when working with column readers: +// +// "Values" refers to the physical data values in a data page. +// +// This is separate from the number of "rows" in a column and the total number +// of "elements" in a column because null values aren't stored physically in the +// data page but are represented via definition levels, so the number of values +// in a column can be less than the number of rows. +// +// The total number of "elements" in a column also differs because of potential +// repeated fields, where you can have multiple values in the page which +// together make up a single element (such as a list) or depending on the repetition +// level and definition level, could represent an entire null list or just a null +// element inside of a list. +type ColumnChunkReader interface { + // HasNext returns whether there is more data to be read in this column + // and row group. + HasNext() bool + // Type returns the underlying physical type of the column + Type() parquet.Type + // Descriptor returns the column schema container + Descriptor() *schema.Column + // if HasNext returns false because of an error, this will return the error + // it encountered. Otherwise this will be nil if it's just the end of the + // column + Err() error + // Skip buffered values + consumeBufferedValues(int64) + // number of available buffered values that have not been decoded yet + // when this returns 0, you're at the end of a page. + numAvailValues() int64 + // read the definition levels and return the number of definitions, + // and the number of values to be read (number of def levels == maxdef level) + // it also populates the passed in slice which should be sized appropriately. + readDefinitionLevels(levels []int16) (int, int64) + // read the repetition levels and return the number of repetition levels read + // also populates the passed in slice, which should be sized appropriately. + readRepetitionLevels(levels []int16) int + // a column is made up of potentially multiple pages across potentially multiple + // row groups. A PageReader allows looping through the pages in a single row group. + // When moving to another row group for reading, use setPageReader to re-use the + // column reader for reading the pages of the new row group. + pager() PageReader + // set a page reader into the columnreader so it can be reused. + // + // This will clear any current error in the reader but does not + // automatically read the first page of the page reader passed in until + // HasNext which will read in the next page. + setPageReader(PageReader) +} + +type columnChunkReader struct { + descr *schema.Column + rdr PageReader + repetitionDecoder encoding.LevelDecoder + definitionDecoder encoding.LevelDecoder + + curPage Page + curEncoding format.Encoding + curDecoder encoding.TypedDecoder + + // number of currently buffered values in the current page + numBuffered int64 + // the number of values we've decoded so far + numDecoded int64 + mem memory.Allocator + + decoders map[format.Encoding]encoding.TypedDecoder + decoderTraits encoding.DecoderTraits + + // is set when an error is encountered + err error + defLvlBuffer []int16 +} + +// NewColumnReader returns a column reader for the provided column initialized with the given pagereader that will +// provide the pages of data for this column. The type is determined from the column passed in. +func NewColumnReader(descr *schema.Column, pageReader PageReader, mem memory.Allocator) ColumnChunkReader { + base := columnChunkReader{descr: descr, rdr: pageReader, mem: mem, decoders: make(map[format.Encoding]encoding.TypedDecoder)} + switch descr.PhysicalType() { + case parquet.Types.FixedLenByteArray: + base.decoderTraits = &encoding.FixedLenByteArrayDecoderTraits + return &FixedLenByteArrayColumnChunkReader{base} + case parquet.Types.Float: + base.decoderTraits = &encoding.Float32DecoderTraits + return &Float32ColumnChunkReader{base} + case parquet.Types.Double: + base.decoderTraits = &encoding.Float64DecoderTraits + return &Float64ColumnChunkReader{base} + case parquet.Types.ByteArray: + base.decoderTraits = &encoding.ByteArrayDecoderTraits + return &ByteArrayColumnChunkReader{base} + case parquet.Types.Int32: + base.decoderTraits = &encoding.Int32DecoderTraits + return &Int32ColumnChunkReader{base} + case parquet.Types.Int64: + base.decoderTraits = &encoding.Int64DecoderTraits + return &Int64ColumnChunkReader{base} + case parquet.Types.Int96: + base.decoderTraits = &encoding.Int96DecoderTraits + return &Int96ColumnChunkReader{base} + case parquet.Types.Boolean: + base.decoderTraits = &encoding.BooleanDecoderTraits + return &BooleanColumnChunkReader{base} + } + return nil +} + +func (c *columnChunkReader) Err() error { return c.err } +func (c *columnChunkReader) Type() parquet.Type { return c.descr.PhysicalType() } +func (c *columnChunkReader) Descriptor() *schema.Column { return c.descr } +func (c *columnChunkReader) consumeBufferedValues(n int64) { c.numDecoded += n } +func (c *columnChunkReader) numAvailValues() int64 { return c.numBuffered - c.numDecoded } +func (c *columnChunkReader) pager() PageReader { return c.rdr } +func (c *columnChunkReader) setPageReader(rdr PageReader) { + c.rdr, c.err = rdr, nil + c.decoders = make(map[format.Encoding]encoding.TypedDecoder) + c.numBuffered, c.numDecoded = 0, 0 +} + +func (c *columnChunkReader) getDefLvlBuffer(sz int64) []int16 { + if int64(len(c.defLvlBuffer)) < sz { + c.defLvlBuffer = make([]int16, sz) + return c.defLvlBuffer + } + + return c.defLvlBuffer[:sz] +} + +// HasNext returns whether there is more data to be read in this column +// and row group. +func (c *columnChunkReader) HasNext() bool { + if c.numBuffered == 0 || c.numDecoded == c.numBuffered { + return c.readNewPage() && c.numBuffered != 0 + } + return true +} + +func (c *columnChunkReader) configureDict(page *DictionaryPage) error { + enc := page.encoding + if enc == format.Encoding_PLAIN_DICTIONARY || enc == format.Encoding_PLAIN { + enc = format.Encoding_RLE_DICTIONARY + } + + if _, ok := c.decoders[enc]; ok { + return xerrors.New("parquet: column chunk cannot have more than one dictionary.") + } + + switch page.Encoding() { + case format.Encoding_PLAIN, format.Encoding_PLAIN_DICTIONARY: + dict := c.decoderTraits.Decoder(parquet.Encodings.Plain, c.descr, false, c.mem) + dict.SetData(int(page.NumValues()), page.Data()) + + decoder := c.decoderTraits.Decoder(parquet.Encodings.Plain, c.descr, true, c.mem).(encoding.DictDecoder) + decoder.SetDict(dict) + c.decoders[enc] = decoder + default: + return xerrors.New("parquet: dictionary index must be plain encoding") + } + + c.curDecoder = c.decoders[enc] + return nil +} + +// read a new page from the page reader +func (c *columnChunkReader) readNewPage() bool { + for c.rdr.Next() { // keep going until we get a data page + c.curPage = c.rdr.Page() + if c.curPage == nil { + break + } + + var lvlByteLen int64 + switch p := c.curPage.(type) { + case *DictionaryPage: + if err := c.configureDict(p); err != nil { + c.err = err + return false + } + continue + case *DataPageV1: + lvlByteLen, c.err = c.initLevelDecodersV1(p, p.repLvlEncoding, p.defLvlEncoding) + if c.err != nil { + return false + } + case *DataPageV2: + lvlByteLen, c.err = c.initLevelDecodersV2(p) + if c.err != nil { + return false + } + default: + // we can skip non-data pages + continue + } + + c.err = c.initDataDecoder(c.curPage, lvlByteLen) + return c.err == nil + } + c.err = c.rdr.Err() + return false +} + +func (c *columnChunkReader) initLevelDecodersV2(page *DataPageV2) (int64, error) { + c.numBuffered = int64(page.nvals) + c.numDecoded = 0 + buf := page.Data() + totalLvlLen := int64(page.repLvlByteLen) + int64(page.defLvlByteLen) + + if totalLvlLen > int64(len(buf)) { + return totalLvlLen, xerrors.New("parquet: data page too small for levels (corrupt header?)") + } + + if c.descr.MaxRepetitionLevel() > 0 { + c.repetitionDecoder.SetDataV2(page.repLvlByteLen, c.descr.MaxRepetitionLevel(), int(c.numBuffered), buf) + buf = buf[page.repLvlByteLen:] + } + + if c.descr.MaxDefinitionLevel() > 0 { + c.definitionDecoder.SetDataV2(page.defLvlByteLen, c.descr.MaxDefinitionLevel(), int(c.numBuffered), buf) + } + + return totalLvlLen, nil +} + +func (c *columnChunkReader) initLevelDecodersV1(page *DataPageV1, repLvlEncoding, defLvlEncoding format.Encoding) (int64, error) { + c.numBuffered = int64(page.nvals) + c.numDecoded = 0 + + buf := page.Data() + maxSize := len(buf) + levelsByteLen := int64(0) + + // Data page layout: Repetition Levels - Definition Levels - encoded values. + // Levels are encoded as rle or bit-packed + if c.descr.MaxRepetitionLevel() > 0 { + repBytes, err := c.repetitionDecoder.SetData(parquet.Encoding(repLvlEncoding), c.descr.MaxRepetitionLevel(), int(c.numBuffered), buf) + if err != nil { + return levelsByteLen, err + } + buf = buf[repBytes:] + maxSize -= repBytes + levelsByteLen += int64(repBytes) + } + + if c.descr.MaxDefinitionLevel() > 0 { + defBytes, err := c.definitionDecoder.SetData(parquet.Encoding(defLvlEncoding), c.descr.MaxDefinitionLevel(), int(c.numBuffered), buf) + if err != nil { + return levelsByteLen, err + } + levelsByteLen += int64(defBytes) + maxSize -= defBytes + } + + return levelsByteLen, nil +} + +func (c *columnChunkReader) initDataDecoder(page Page, lvlByteLen int64) error { + buf := page.Data() + if int64(len(buf)) < lvlByteLen { + return xerrors.New("parquet: page smaller than size of encoded levels") + } + + buf = buf[lvlByteLen:] + encoding := page.Encoding() + + if isDictIndexEncoding(encoding) { + encoding = format.Encoding_RLE_DICTIONARY + } + + if decoder, ok := c.decoders[encoding]; ok { + c.curDecoder = decoder + } else { + switch encoding { + case format.Encoding_PLAIN, + format.Encoding_DELTA_BYTE_ARRAY, + format.Encoding_DELTA_LENGTH_BYTE_ARRAY, + format.Encoding_DELTA_BINARY_PACKED: + c.curDecoder = c.decoderTraits.Decoder(parquet.Encoding(encoding), c.descr, false, c.mem) + c.decoders[encoding] = c.curDecoder + case format.Encoding_RLE_DICTIONARY: + return xerrors.New("parquet: dictionary page must be before data page") + case format.Encoding_BYTE_STREAM_SPLIT: + return xerrors.Errorf("parquet: unsupported data encoding %s", encoding) + default: + return xerrors.Errorf("parquet: unknown encoding type %s", encoding) + } + } + + c.curEncoding = encoding + c.curDecoder.SetData(int(c.numBuffered), buf) + return nil +} + +// readDefinitionLevels decodes the definition levels from the page and returns +// it returns the total number of levels that were decoded (and thus populated +// in the passed in slice) and the number of physical values that exist to read +// (the number of levels that are equal to the max definition level). +// +// If the max definition level is 0, the assumption is that there no nulls in the +// column and therefore no definition levels to read, so it will always return 0, 0 +func (c *columnChunkReader) readDefinitionLevels(levels []int16) (totalDecoded int, valuesToRead int64) { + if c.descr.MaxDefinitionLevel() == 0 { + return 0, 0 + } + + return c.definitionDecoder.Decode(levels) +} + +// readRepetitionLevels decodes the repetition levels from the page and returns +// the total number of values decoded (and thus populated in the passed in levels +// slice). +// +// If max repetition level is 0, it is assumed there are no repetition levels, +// and thus will always return 0. +func (c *columnChunkReader) readRepetitionLevels(levels []int16) int { + if c.descr.MaxRepetitionLevel() == 0 { + return 0 + } + + nlevels, _ := c.repetitionDecoder.Decode(levels) + return nlevels +} + +// determineNumToRead reads the definition levels (and optionally populates the repetition levels) +// in order to determine how many values need to be read to fulfill this batch read. +// +// batchLen is the number of values it is desired to read. defLvls must be either nil (in which case +// a buffer will be used) or must be at least batchLen in length to be safe. repLvls should be either nil +// (in which case it is ignored) or should be at least batchLen in length to be safe. +// +// In the return values: ndef is the number of definition levels that were actually read in which will +// typically be the minimum of batchLen and numAvailValues. +// toRead is the number of physical values that should be read in based on the definition levels (the number +// of definition levels that were equal to maxDefinitionLevel). and err being either nil or any error encountered +func (c *columnChunkReader) determineNumToRead(batchLen int64, defLvls, repLvls []int16) (ndefs int, toRead int64, err error) { + if !c.HasNext() { + return 0, 0, c.err + } + + size := utils.Min(batchLen, c.numBuffered-c.numDecoded) + + if c.descr.MaxDefinitionLevel() > 0 { + if defLvls == nil { + defLvls = c.getDefLvlBuffer(size) + } + ndefs, toRead = c.readDefinitionLevels(defLvls[:size]) + } else { + toRead = size + } + + if c.descr.MaxRepetitionLevel() > 0 && repLvls != nil { + nreps := c.readRepetitionLevels(repLvls[:size]) + if defLvls != nil && ndefs != nreps { + err = xerrors.New("parquet: number of decoded rep/def levels did not match") + } + } + return +} + +// skipValues some number of rows using readFn as the function to read the data and throw it away. +// If we can skipValues a whole page based on its metadata, then we do so, otherwise we read the +// page until we have skipped the number of rows desired. +func (c *columnChunkReader) skipValues(nvalues int64, readFn func(batch int64, buf []byte) (int64, error)) (int64, error) { + var err error + toskip := nvalues + for c.HasNext() && toskip > 0 { + // if number to skip is more than the number of undecoded values, skip the page + if toskip > (c.numBuffered - c.numDecoded) { + toskip -= c.numBuffered - c.numDecoded + c.numDecoded = c.numBuffered + } else { + var ( + batchSize int64 = 1024 + valsRead int64 = 0 + ) + + scratch := memory.NewResizableBuffer(c.mem) + scratch.Reserve(c.decoderTraits.BytesRequired(int(batchSize))) + defer scratch.Release() + + for { + batchSize = utils.Min(batchSize, toskip) + valsRead, err = readFn(batchSize, scratch.Buf()) + toskip -= valsRead + if valsRead <= 0 || toskip <= 0 || err != nil { + break + } + } + } + } + if c.err != nil { + err = c.err + } + return nvalues - toskip, err +} + +type readerFunc func(int64, int64) (int, error) + +// base function for reading a batch of values, this will read until it either reads in batchSize values or +// it hits the end of the column chunk, including reading multiple pages. +// +// totalValues is the total number of values which were read in, and thus would be the total number +// of definition levels and repetition levels which were populated (if they were non-nil). totalRead +// is the number of physical values that were read in (ie: the number of non-null values) +func (c *columnChunkReader) readBatch(batchSize int64, defLvls, repLvls []int16, readFn readerFunc) (totalLvls int64, totalRead int, err error) { + var ( + read int + defs []int16 + reps []int16 + ndefs int + toRead int64 + ) + + for c.HasNext() && totalLvls < batchSize && err == nil { + if defLvls != nil { + defs = defLvls[totalLvls:] + } + if repLvls != nil { + reps = repLvls[totalLvls:] + } + ndefs, toRead, err = c.determineNumToRead(batchSize-totalLvls, defs, reps) + if err != nil { + return totalLvls, totalRead, err + } + + read, err = readFn(int64(totalRead), toRead) + // the total number of values processed here is the maximum of + // the number of definition levels or the number of physical values read. + // if this is a required field, ndefs will be 0 since there is no definition + // levels stored with it and `read` will be the number of values, otherwise + // we use ndefs since it will be equal to or greater than read. + totalVals := int64(utils.MaxInt(ndefs, read)) + c.consumeBufferedValues(totalVals) + + totalLvls += totalVals + totalRead += read + } + return totalLvls, totalRead, err +} diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go new file mode 100644 index 0000000000000..d22e365fa805e --- /dev/null +++ b/go/parquet/file/column_reader_test.go @@ -0,0 +1,450 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file_test + +import ( + "math" + "math/rand" + "reflect" + "testing" + + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/file" + "github.com/apache/arrow/go/parquet/internal/testutils" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/schema" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +func initValues(values reflect.Value) { + if values.Kind() != reflect.Slice { + panic("must init values with slice") + } + + r := rand.New(rand.NewSource(0)) + typ := values.Type().Elem() + switch { + case typ.Bits() <= 32: + max := int64(math.MaxInt32) + min := int64(math.MinInt32) + for i := 0; i < values.Len(); i++ { + values.Index(i).Set(reflect.ValueOf(r.Int63n(max-min+1) + min).Convert(reflect.TypeOf(int32(0)))) + } + case typ.Bits() <= 64: + max := int64(math.MaxInt64) + min := int64(math.MinInt64) + for i := 0; i < values.Len(); i++ { + values.Index(i).Set(reflect.ValueOf(r.Int63n(max-min+1) + min)) + } + } +} + +func initDictValues(values reflect.Value, numDicts int) { + repeatFactor := values.Len() / numDicts + initValues(values) + // add some repeated values + for j := 1; j < repeatFactor; j++ { + for i := 0; i < numDicts; i++ { + values.Index(numDicts*j + i).Set(values.Index(i)) + } + } + // computed only dict_per_page * repeat_factor - 1 values < num_values compute remaining + for i := numDicts * repeatFactor; i < values.Len(); i++ { + values.Index(i).Set(values.Index(i - numDicts*repeatFactor)) + } +} + +func makePages(version parquet.DataPageVersion, d *schema.Column, npages, lvlsPerPage int, typ reflect.Type, enc parquet.Encoding) ([]file.Page, int, reflect.Value, []int16, []int16) { + nlevels := lvlsPerPage * npages + nvalues := 0 + + maxDef := d.MaxDefinitionLevel() + maxRep := d.MaxRepetitionLevel() + + var ( + defLevels []int16 + repLevels []int16 + ) + + valuesPerPage := make([]int, npages) + if maxDef > 0 { + defLevels = make([]int16, nlevels) + testutils.FillRandomInt16(0, 0, maxDef, defLevels) + for idx := range valuesPerPage { + numPerPage := 0 + for i := 0; i < lvlsPerPage; i++ { + if defLevels[i+idx*lvlsPerPage] == maxDef { + numPerPage++ + nvalues++ + } + } + valuesPerPage[idx] = numPerPage + } + } else { + nvalues = nlevels + valuesPerPage[0] = lvlsPerPage + for i := 1; i < len(valuesPerPage); i *= 2 { + copy(valuesPerPage[i:], valuesPerPage[:i]) + } + } + + if maxRep > 0 { + repLevels = make([]int16, nlevels) + testutils.FillRandomInt16(0, 0, maxRep, repLevels) + } + + values := reflect.MakeSlice(reflect.SliceOf(typ), nvalues, nvalues) + if enc == parquet.Encodings.Plain { + initValues(values) + return testutils.PaginatePlain(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.Plain), nvalues, values, defLevels, repLevels + } else if enc == parquet.Encodings.PlainDict || enc == parquet.Encodings.RLEDict { + initDictValues(values, lvlsPerPage) + return testutils.PaginateDict(version, d, values, defLevels, repLevels, maxDef, maxRep, lvlsPerPage, valuesPerPage, parquet.Encodings.RLEDict), nvalues, values, defLevels, repLevels + } + panic("invalid encoding type for make pages") +} + +func compareVectorWithDefLevels(left, right reflect.Value, defLevels []int16, maxDef, maxRep int16) assert.Comparison { + return func() bool { + if left.Kind() != reflect.Slice || right.Kind() != reflect.Slice { + return false + } + + if left.Type().Elem() != right.Type().Elem() { + return false + } + + iLeft, iRight := 0, 0 + for _, def := range defLevels { + if def == maxDef { + if !reflect.DeepEqual(left.Index(iLeft).Interface(), right.Index(iRight).Interface()) { + return false + } + iLeft++ + iRight++ + } else if def == (maxDef - 1) { + // null entry on the lowest nested level + iRight++ + } else if def < (maxDef - 1) { + // null entry on higher nesting level, only supported for non-repeating data + if maxRep == 0 { + iRight++ + } + } + } + return true + } +} + +var mem = memory.DefaultAllocator + +type PrimitiveReaderSuite struct { + suite.Suite + + dataPageVersion parquet.DataPageVersion + pager file.PageReader + reader file.ColumnChunkReader + pages []file.Page + values reflect.Value + defLevels []int16 + repLevels []int16 + nlevels int + nvalues int + maxDefLvl int16 + maxRepLvl int16 +} + +func (p *PrimitiveReaderSuite) TearDownTest() { + p.clear() +} + +func (p *PrimitiveReaderSuite) initReader(d *schema.Column) { + m := new(testutils.MockPageReader) + m.Test(p.T()) + m.TestData().Set("pages", p.pages) + m.On("Err").Return((error)(nil)) + p.pager = m + p.reader = file.NewColumnReader(d, m, mem) +} + +func (p *PrimitiveReaderSuite) checkResults() { + vresult := make([]int32, p.nvalues) + dresult := make([]int16, p.nlevels) + rresult := make([]int16, p.nlevels) + + var ( + read int64 = 0 + totalRead int = 0 + batchActual int = 0 + batchSize int32 = 8 + batch int = 0 + ) + + rdr := p.reader.(*file.Int32ColumnChunkReader) + p.Require().NotNil(rdr) + + // this will cover both cases: + // 1) batch size < page size (multiple ReadBatch from a single page) + // 2) batch size > page size (BatchRead limits to single page) + for { + read, batch, _ = rdr.ReadBatch(int64(batchSize), vresult[totalRead:], dresult[batchActual:], rresult[batchActual:]) + totalRead += batch + batchActual += int(read) + batchSize = int32(utils.MinInt(1<<24, utils.MaxInt(int(batchSize*2), 4096))) + if batch <= 0 { + break + } + } + + p.Equal(p.nlevels, batchActual) + p.Equal(p.nvalues, totalRead) + p.Equal(p.values.Interface(), vresult) + if p.maxDefLvl > 0 { + p.Equal(p.defLevels, dresult) + } + if p.maxRepLvl > 0 { + p.Equal(p.repLevels, rresult) + } + + // catch improper writes at EOS + read, batchActual, _ = rdr.ReadBatch(5, vresult, nil, nil) + p.Zero(batchActual) + p.Zero(read) +} + +func (p *PrimitiveReaderSuite) clear() { + p.values = reflect.ValueOf(nil) + p.defLevels = nil + p.repLevels = nil + p.pages = nil + p.pager = nil + p.reader = nil +} + +func (p *PrimitiveReaderSuite) testPlain(npages, levels int, d *schema.Column) { + p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levels, reflect.TypeOf(int32(0)), parquet.Encodings.Plain) + p.nlevels = npages * levels + p.initReader(d) + p.checkResults() + p.clear() +} + +func (p *PrimitiveReaderSuite) testDict(npages, levels int, d *schema.Column) { + p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levels, reflect.TypeOf(int32(0)), parquet.Encodings.RLEDict) + p.nlevels = npages * levels + p.initReader(d) + p.checkResults() + p.clear() +} + +func (p *PrimitiveReaderSuite) TestInt32FlatRequired() { + const ( + levelsPerPage int = 100 + npages int = 50 + ) + + p.maxDefLvl = 0 + p.maxRepLvl = 0 + + typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) + d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) + p.testPlain(npages, levelsPerPage, d) + p.testDict(npages, levelsPerPage, d) +} + +func (p *PrimitiveReaderSuite) TestInt32FlatOptional() { + const ( + levelsPerPage int = 100 + npages int = 50 + ) + + p.maxDefLvl = 4 + p.maxRepLvl = 0 + typ := schema.NewInt32Node("b", parquet.Repetitions.Optional, -1) + d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) + p.testPlain(npages, levelsPerPage, d) + p.testDict(npages, levelsPerPage, d) +} + +func (p *PrimitiveReaderSuite) TestInt32FlatRepeated() { + const ( + levelsPerPage int = 100 + npages int = 50 + ) + + p.maxDefLvl = 4 + p.maxRepLvl = 2 + typ := schema.NewInt32Node("c", parquet.Repetitions.Repeated, -1) + d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) + p.testPlain(npages, levelsPerPage, d) + p.testDict(npages, levelsPerPage, d) +} + +func (p *PrimitiveReaderSuite) TestReadBatchMultiPage() { + const ( + levelsPerPage int = 100 + npages int = 3 + ) + + p.maxDefLvl = 0 + p.maxRepLvl = 0 + typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) + d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) + p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(int32(0)), parquet.Encodings.Plain) + p.initReader(d) + + vresult := make([]int32, levelsPerPage*npages) + dresult := make([]int16, levelsPerPage*npages) + rresult := make([]int16, levelsPerPage*npages) + + rdr := p.reader.(*file.Int32ColumnChunkReader) + total, read, err := rdr.ReadBatch(int64(levelsPerPage*npages), vresult, dresult, rresult) + p.NoError(err) + p.EqualValues(levelsPerPage*npages, total) + p.EqualValues(levelsPerPage*npages, read) +} + +func (p *PrimitiveReaderSuite) TestInt32FlatRequiredSkip() { + const ( + levelsPerPage int = 100 + npages int = 5 + ) + + p.maxDefLvl = 0 + p.maxRepLvl = 0 + typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) + d := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) + p.pages, p.nvalues, p.values, p.defLevels, p.repLevels = makePages(p.dataPageVersion, d, npages, levelsPerPage, reflect.TypeOf(int32(0)), parquet.Encodings.Plain) + p.initReader(d) + + vresult := make([]int32, levelsPerPage/2) + dresult := make([]int16, levelsPerPage/2) + rresult := make([]int16, levelsPerPage/2) + + rdr := p.reader.(*file.Int32ColumnChunkReader) + + p.Run("skip_size > page_size", func() { + // Skip first 2 pages + skipped, _ := rdr.Skip(int64(2 * levelsPerPage)) + p.Equal(int64(2*levelsPerPage), skipped) + + rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) + subVals := p.values.Slice(2*levelsPerPage, int(2.5*float64(levelsPerPage))).Interface().([]int32) + p.Equal(subVals, vresult) + }) + + p.Run("skip_size == page_size", func() { + // skip across two pages + skipped, _ := rdr.Skip(int64(levelsPerPage)) + p.Equal(int64(levelsPerPage), skipped) + // read half a page + rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) + subVals := p.values.Slice(int(3.5*float64(levelsPerPage)), 4*levelsPerPage).Interface().([]int32) + p.Equal(subVals, vresult) + }) + + p.Run("skip_size < page_size", func() { + // skip limited to a single page + // Skip half a page + skipped, _ := rdr.Skip(int64(levelsPerPage / 2)) + p.Equal(int64(0.5*float32(levelsPerPage)), skipped) + // Read half a page + rdr.ReadBatch(int64(levelsPerPage/2), vresult, dresult, rresult) + subVals := p.values.Slice(int(4.5*float64(levelsPerPage)), p.values.Len()).Interface().([]int32) + p.Equal(subVals, vresult) + }) +} + +func (p *PrimitiveReaderSuite) TestDictionaryEncodedPages() { + p.maxDefLvl = 0 + p.maxRepLvl = 0 + typ := schema.NewInt32Node("a", parquet.Repetitions.Required, -1) + descr := schema.NewColumn(typ, p.maxDefLvl, p.maxRepLvl) + dummy := memory.NewResizableBuffer(mem) + + p.Run("Dict: Plain, Data: RLEDict", func() { + dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.Plain) + dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.RLEDict, dummy, nil, nil, 0, 0) + + p.pages = append(p.pages, dictPage, dataPage) + p.initReader(descr) + p.NotPanics(func() { p.reader.HasNext() }) + p.NoError(p.reader.Err()) + p.pages = p.pages[:0] + }) + + p.Run("Dict: Plain Dictionary, Data: Plain Dictionary", func() { + dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.PlainDict) + dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.PlainDict, dummy, nil, nil, 0, 0) + p.pages = append(p.pages, dictPage, dataPage) + p.initReader(descr) + p.NotPanics(func() { p.reader.HasNext() }) + p.NoError(p.reader.Err()) + p.pages = p.pages[:0] + }) + + p.Run("Panic if dict page not first", func() { + dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.RLEDict, dummy, nil, nil, 0, 0) + p.pages = append(p.pages, dataPage) + p.initReader(descr) + p.NotPanics(func() { p.False(p.reader.HasNext()) }) + p.Error(p.reader.Err()) + p.pages = p.pages[:0] + }) + + p.Run("Only RLE is supported", func() { + dictPage := file.NewDictionaryPage(dummy, 0, parquet.Encodings.DeltaByteArray) + p.pages = append(p.pages, dictPage) + p.initReader(descr) + p.NotPanics(func() { p.False(p.reader.HasNext()) }) + p.Error(p.reader.Err()) + p.pages = p.pages[:0] + }) + + p.Run("Cannot have more than one dict", func() { + dictPage1 := file.NewDictionaryPage(dummy, 0, parquet.Encodings.PlainDict) + dictPage2 := file.NewDictionaryPage(dummy, 0, parquet.Encodings.Plain) + p.pages = append(p.pages, dictPage1, dictPage2) + p.initReader(descr) + p.NotPanics(func() { p.False(p.reader.HasNext()) }) + p.Error(p.reader.Err()) + p.pages = p.pages[:0] + }) + + p.Run("Unsupported encoding", func() { + dataPage := testutils.MakeDataPage(p.dataPageVersion, descr, nil, 0, parquet.Encodings.DeltaByteArray, dummy, nil, nil, 0, 0) + p.pages = append(p.pages, dataPage) + p.initReader(descr) + p.Panics(func() { p.reader.HasNext() }) + // p.Error(p.reader.Err()) + p.pages = p.pages[:0] + }) + + p.pages = p.pages[:2] +} + +func TestPrimitiveReader(t *testing.T) { + t.Parallel() + t.Run("datapage v1", func(t *testing.T) { + suite.Run(t, new(PrimitiveReaderSuite)) + }) + t.Run("datapage v2", func(t *testing.T) { + suite.Run(t, &PrimitiveReaderSuite{dataPageVersion: parquet.DataPageV2}) + }) +} diff --git a/go/parquet/file/column_reader_types.gen.go b/go/parquet/file/column_reader_types.gen.go new file mode 100644 index 0000000000000..ab1fd535bbf27 --- /dev/null +++ b/go/parquet/file/column_reader_types.gen.go @@ -0,0 +1,299 @@ +// Code generated by column_reader_types.gen.go.tmpl. DO NOT EDIT. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/encoding" +) + +// Int32ColumnChunkReader is the Typed Column chunk reader instance for reading +// Int32 column data. +type Int32ColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *Int32ColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + arrow.Int32Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *Int32ColumnChunkReader) ReadBatch(batchSize int64, values []int32, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.Int32Decoder).Decode(values[start : start+len]) + }) +} + +// Int64ColumnChunkReader is the Typed Column chunk reader instance for reading +// Int64 column data. +type Int64ColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *Int64ColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + arrow.Int64Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *Int64ColumnChunkReader) ReadBatch(batchSize int64, values []int64, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.Int64Decoder).Decode(values[start : start+len]) + }) +} + +// Int96ColumnChunkReader is the Typed Column chunk reader instance for reading +// Int96 column data. +type Int96ColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *Int96ColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + parquet.Int96Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *Int96ColumnChunkReader) ReadBatch(batchSize int64, values []parquet.Int96, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.Int96Decoder).Decode(values[start : start+len]) + }) +} + +// Float32ColumnChunkReader is the Typed Column chunk reader instance for reading +// Float32 column data. +type Float32ColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *Float32ColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + arrow.Float32Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *Float32ColumnChunkReader) ReadBatch(batchSize int64, values []float32, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.Float32Decoder).Decode(values[start : start+len]) + }) +} + +// Float64ColumnChunkReader is the Typed Column chunk reader instance for reading +// Float64 column data. +type Float64ColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *Float64ColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + arrow.Float64Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *Float64ColumnChunkReader) ReadBatch(batchSize int64, values []float64, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.Float64Decoder).Decode(values[start : start+len]) + }) +} + +// BooleanColumnChunkReader is the Typed Column chunk reader instance for reading +// Boolean column data. +type BooleanColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *BooleanColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + *(*[]bool)(unsafe.Pointer(&buf)), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *BooleanColumnChunkReader) ReadBatch(batchSize int64, values []bool, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.BooleanDecoder).Decode(values[start : start+len]) + }) +} + +// ByteArrayColumnChunkReader is the Typed Column chunk reader instance for reading +// ByteArray column data. +type ByteArrayColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *ByteArrayColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + parquet.ByteArrayTraits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *ByteArrayColumnChunkReader) ReadBatch(batchSize int64, values []parquet.ByteArray, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.ByteArrayDecoder).Decode(values[start : start+len]) + }) +} + +// FixedLenByteArrayColumnChunkReader is the Typed Column chunk reader instance for reading +// FixedLenByteArray column data. +type FixedLenByteArrayColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *FixedLenByteArrayColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + parquet.FixedLenByteArrayTraits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *FixedLenByteArrayColumnChunkReader) ReadBatch(batchSize int64, values []parquet.FixedLenByteArray, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.FixedLenByteArrayDecoder).Decode(values[start : start+len]) + }) +} diff --git a/go/parquet/file/column_reader_types.gen.go.tmpl b/go/parquet/file/column_reader_types.gen.go.tmpl new file mode 100644 index 0000000000000..23b7d3ed823c3 --- /dev/null +++ b/go/parquet/file/column_reader_types.gen.go.tmpl @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/encoding" +) + +{{range .In}} +// {{.Name}}ColumnChunkReader is the Typed Column chunk reader instance for reading +// {{.Name}} column data. +type {{.Name}}ColumnChunkReader struct { + columnChunkReader +} + +// Skip skips the next nvalues so that the next call to ReadBatch +// will start reading *after* the skipped values. +func (cr *{{.Name}}ColumnChunkReader) Skip(nvalues int64) (int64, error) { + return cr.columnChunkReader.skipValues(nvalues, + func(batch int64, buf []byte) (int64, error) { + vals, _, err := cr.ReadBatch(batch, + {{- if ne .Name "Boolean"}} + {{.prefix}}.{{.Name}}Traits.CastFromBytes(buf), + {{- else}} + *(*[]bool)(unsafe.Pointer(&buf)), + {{- end}} + arrow.Int16Traits.CastFromBytes(buf), + arrow.Int16Traits.CastFromBytes(buf)) + return vals, err + }) +} + +// ReadBatch reads batchSize values from the column. +// +// Returns error if values is not at least big enough to hold the number of values that will be read. +// +// defLvls and repLvls can be nil, or will be populated if not nil. If not nil, they must be +// at least large enough to hold the number of values that will be read. +// +// total is the number of rows that were read, valuesRead is the actual number of physical values +// that were read excluding nulls +func (cr *{{.Name}}ColumnChunkReader) ReadBatch(batchSize int64, values []{{.name}}, defLvls, repLvls []int16) (total int64, valuesRead int, err error) { + return cr.readBatch(batchSize, defLvls, repLvls, func(start, len int64) (int, error) { + return cr.curDecoder.(encoding.{{.Name}}Decoder).Decode(values[start:start+len]) + }) +} +{{end}} diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go new file mode 100644 index 0000000000000..8b95223a14d82 --- /dev/null +++ b/go/parquet/file/file_reader.go @@ -0,0 +1,336 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "bytes" + "encoding/binary" + "io" + "os" + + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/encryption" + "github.com/apache/arrow/go/parquet/metadata" + "golang.org/x/exp/mmap" + "golang.org/x/xerrors" +) + +const ( + footerSize uint32 = 8 +) + +var ( + magicBytes = []byte("PAR1") + magicEBytes = []byte("PARE") + errInconsistentFileMetadata = xerrors.New("parquet: file is smaller than indicated metadata size") +) + +// Reader is the main interface for reading a parquet file +type Reader struct { + r parquet.ReaderAtSeeker + props *parquet.ReaderProperties + metadata *metadata.FileMetaData + footerOffset int64 + fileDecryptor encryption.FileDecryptor +} + +// an adapter for mmap'd files +type mmapAdapter struct { + *mmap.ReaderAt + + pos int64 +} + +func (m *mmapAdapter) Close() error { + return m.ReaderAt.Close() +} + +func (m *mmapAdapter) ReadAt(p []byte, off int64) (int, error) { + return m.ReaderAt.ReadAt(p, off) +} + +func (m *mmapAdapter) Read(p []byte) (n int, err error) { + n, err = m.ReaderAt.ReadAt(p, m.pos) + m.pos += int64(n) + return +} + +func (m *mmapAdapter) Seek(offset int64, whence int) (int64, error) { + newPos, offs := int64(0), offset + switch whence { + case io.SeekStart: + newPos = offs + case io.SeekCurrent: + newPos = m.pos + offs + case io.SeekEnd: + newPos = int64(m.ReaderAt.Len()) + offs + } + if newPos < 0 { + return 0, xerrors.New("negative result pos") + } + if newPos > int64(m.ReaderAt.Len()) { + return 0, xerrors.New("new position exceeds size of file") + } + m.pos = newPos + return newPos, nil +} + +type ReadOption func(*Reader) + +// WithReadProps specifies a specific reader properties instance to use, rather +// than using the default ReaderProperties. +func WithReadProps(props *parquet.ReaderProperties) ReadOption { + return func(r *Reader) { + r.props = props + } +} + +// WithMetadata allows providing a specific FileMetaData object rather than reading +// the file metadata from the file itself. +func WithMetadata(m *metadata.FileMetaData) ReadOption { + return func(r *Reader) { + r.metadata = m + } +} + +// OpenParquetFile will return a Reader for the given parquet file on the local file system. +// +// Optionally the file can be memory mapped for faster reading. If no read properties are provided +// then the default ReaderProperties will be used. The WithMetadata option can be used to provide +// a FileMetaData object rather than reading the file metadata from the file. +func OpenParquetFile(filename string, memoryMap bool, opts ...ReadOption) (*Reader, error) { + var source parquet.ReaderAtSeeker + + var err error + if memoryMap { + rdr, err := mmap.Open(filename) + if err != nil { + return nil, err + } + source = &mmapAdapter{rdr, 0} + } else { + source, err = os.Open(filename) + if err != nil { + return nil, err + } + } + return NewParquetReader(source, opts...) +} + +// NewParquetReader returns a FileReader instance that reads a parquet file which can be read from r. +// This reader needs to support Read, ReadAt and Seeking. +// +// If no read properties are provided then the default ReaderProperties will be used. The WithMetadata +// option can be used to provide a FileMetaData object rather than reading the file metadata from the file. +func NewParquetReader(r parquet.ReaderAtSeeker, opts ...ReadOption) (*Reader, error) { + var err error + f := &Reader{r: r} + for _, o := range opts { + o(f) + } + + if f.footerOffset <= 0 { + f.footerOffset, err = r.Seek(0, io.SeekEnd) + if err != nil { + return nil, xerrors.Errorf("parquet: could not retrieve footer offset: %w", err) + } + } + + if f.props == nil { + f.props = parquet.NewReaderProperties(memory.NewGoAllocator()) + } + + if f.metadata == nil { + return f, f.parseMetaData() + } + + return f, nil +} + +// Close will close the current reader, and if the underlying reader being used +// is an `io.Closer` then Close will be called on it too. +func (f *Reader) Close() error { + if r, ok := f.r.(io.Closer); ok { + return r.Close() + } + return nil +} + +// MetaData returns the underlying FileMetadata object +func (f *Reader) MetaData() *metadata.FileMetaData { return f.metadata } + +// parseMetaData handles parsing the metadata from the opened file. +func (f *Reader) parseMetaData() error { + if f.footerOffset <= int64(footerSize) { + return xerrors.Errorf("parquet: file too small (size=%d)", f.footerOffset) + } + + buf := make([]byte, footerSize) + // backup 8 bytes to read the footer size (first four bytes) and the magic bytes (last 4 bytes) + n, err := f.r.ReadAt(buf, f.footerOffset-int64(footerSize)) + if err != nil { + return xerrors.Errorf("parquet: could not read footer: %w", err) + } + if n != len(buf) { + return xerrors.Errorf("parquet: could not read %d bytes from end of file", len(buf)) + } + + size := int64(binary.LittleEndian.Uint32(buf[:4])) + if size < 0 || size+int64(footerSize) > f.footerOffset { + return errInconsistentFileMetadata + } + + fileDecryptProps := f.props.FileDecryptProps + + switch { + case bytes.Equal(buf[4:], magicBytes): // non-encrypted metadata + buf = make([]byte, size) + if _, err := f.r.ReadAt(buf, f.footerOffset-int64(footerSize)-size); err != nil { + return xerrors.Errorf("parquet: could not read footer: %w", err) + } + + f.metadata, err = metadata.NewFileMetaData(buf, nil) + if err != nil { + return xerrors.Errorf("parquet: could not read footer: %w", err) + } + + if !f.metadata.IsSetEncryptionAlgorithm() { + if fileDecryptProps != nil && !fileDecryptProps.PlaintextFilesAllowed() { + return xerrors.Errorf("parquet: applying decryption properties on plaintext file") + } + } else { + if err := f.parseMetaDataEncryptedFilePlaintextFooter(fileDecryptProps, buf); err != nil { + return err + } + } + case bytes.Equal(buf[4:], magicEBytes): // encrypted metadata + buf = make([]byte, size) + if _, err := f.r.ReadAt(buf, f.footerOffset-int64(footerSize)-size); err != nil { + return xerrors.Errorf("parquet: could not read footer: %w", err) + } + + if fileDecryptProps == nil { + return xerrors.New("could not read encrypted metadata, no decryption found in reader's properties") + } + + fileCryptoMetadata, err := metadata.NewFileCryptoMetaData(buf) + if err != nil { + return err + } + algo := fileCryptoMetadata.EncryptionAlgorithm() + fileAad, err := f.handleAadPrefix(fileDecryptProps, &algo) + if err != nil { + return err + } + f.fileDecryptor = encryption.NewFileDecryptor(fileDecryptProps, fileAad, algo.Algo, string(fileCryptoMetadata.KeyMetadata()), f.props.Allocator()) + + f.metadata, err = metadata.NewFileMetaData(buf[fileCryptoMetadata.Len():], f.fileDecryptor) + if err != nil { + return xerrors.Errorf("parquet: could not read footer: %w", err) + } + default: + return xerrors.Errorf("parquet: magic bytes not found in footer. Either the file is corrupted or this isn't a parquet file") + } + + return nil +} + +func (f *Reader) handleAadPrefix(fileDecrypt *parquet.FileDecryptionProperties, algo *parquet.Algorithm) (string, error) { + aadPrefixInProps := fileDecrypt.AadPrefix() + aadPrefix := []byte(aadPrefixInProps) + fileHasAadPrefix := algo.Aad.AadPrefix != nil && len(algo.Aad.AadPrefix) > 0 + aadPrefixInFile := algo.Aad.AadPrefix + + if algo.Aad.SupplyAadPrefix && aadPrefixInProps == "" { + return "", xerrors.New("AAD Prefix used for file encryption but not stored in file and not suppliedin decryption props") + } + + if fileHasAadPrefix { + if aadPrefixInProps != "" { + if aadPrefixInProps != string(aadPrefixInFile) { + return "", xerrors.New("AAD prefix in file and in properties but not the same") + } + } + aadPrefix = aadPrefixInFile + if fileDecrypt.Verifier != nil { + fileDecrypt.Verifier.Verify(string(aadPrefix)) + } + } else { + if !algo.Aad.SupplyAadPrefix && aadPrefixInProps != "" { + return "", xerrors.New("AAD Prefix set in decryptionproperties but was not used for file encryption") + } + if fileDecrypt.Verifier != nil { + return "", xerrors.New("AAD Prefix Verifier is set but AAD Prefix not found in file") + } + } + return string(append(aadPrefix, algo.Aad.AadFileUnique...)), nil +} + +func (f *Reader) parseMetaDataEncryptedFilePlaintextFooter(decryptProps *parquet.FileDecryptionProperties, data []byte) error { + if decryptProps != nil { + algo := f.metadata.EncryptionAlgorithm() + fileAad, err := f.handleAadPrefix(decryptProps, &algo) + if err != nil { + return err + } + f.fileDecryptor = encryption.NewFileDecryptor(decryptProps, fileAad, algo.Algo, string(f.metadata.GetFooterSigningKeyMetadata()), f.props.Allocator()) + // set the InternalFileDecryptor in the metadata as well, as it's used + // for signature verification and for ColumnChunkMetaData creation. + f.metadata.FileDecryptor = f.fileDecryptor + if decryptProps.PlaintextFooterIntegrity() { + if len(data)-f.metadata.Size() != encryption.GcmTagLength+encryption.NonceLength { + return xerrors.New("failed reading metadata for encryption signature") + } + + if !f.metadata.VerifySignature(data[f.metadata.Size():]) { + return xerrors.New("parquet crypto signature verification failed") + } + } + } + return nil +} + +// WriterVersion returns the Application Version that was written in the file +// metadata +func (f *Reader) WriterVersion() *metadata.AppVersion { + return f.metadata.WriterVersion() +} + +// NumRows returns the total number of rows in this parquet file. +func (f *Reader) NumRows() int64 { + return f.metadata.GetNumRows() +} + +// NumRowGroups returns the total number of row groups in this file. +func (f *Reader) NumRowGroups() int { + return len(f.metadata.GetRowGroups()) +} + +// RowGroup returns a reader for the desired (0-based) row group +func (f *Reader) RowGroup(i int) *RowGroupReader { + rg := f.metadata.RowGroups[i] + + return &RowGroupReader{ + fileMetadata: f.metadata, + rgMetadata: metadata.NewRowGroupMetaData(rg, f.metadata.Schema, f.WriterVersion(), f.fileDecryptor), + props: f.props, + r: f.r, + sourceSz: f.footerOffset, + fileDecryptor: f.fileDecryptor, + } +} diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go new file mode 100644 index 0000000000000..6dfb1fa6bc736 --- /dev/null +++ b/go/parquet/file/file_reader_test.go @@ -0,0 +1,304 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file_test + +import ( + "bytes" + "encoding/binary" + "io" + "math/rand" + "testing" + + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet/compress" + "github.com/apache/arrow/go/parquet/file" + "github.com/apache/arrow/go/parquet/internal/encoding" + format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/parquet/internal/thrift" + "github.com/apache/arrow/go/parquet/metadata" + libthrift "github.com/apache/thrift/lib/go/thrift" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +func getDummyStats(statSize int, fillAll bool) *format.Statistics { + statBytes := make([]byte, statSize) + memory.Set(statBytes, 1) + + ret := format.NewStatistics() + ret.Max = statBytes + if fillAll { + ret.Min = statBytes + ret.NullCount = libthrift.Int64Ptr(42) + ret.DistinctCount = libthrift.Int64Ptr(1) + } + return ret +} + +func checkStatistics(t *testing.T, stats format.Statistics, actual metadata.EncodedStatistics) { + if stats.IsSetMax() { + assert.Equal(t, stats.Max, actual.Max) + } + if stats.IsSetMin() { + assert.Equal(t, stats.Min, actual.Min) + } + if stats.IsSetNullCount() { + assert.Equal(t, stats.GetNullCount(), actual.NullCount) + } + if stats.IsSetDistinctCount() { + assert.Equal(t, stats.GetDistinctCount(), actual.DistinctCount) + } +} + +type PageSerdeSuite struct { + suite.Suite + + sink *encoding.BufferWriter + buffer *memory.Buffer + + pageHdr format.PageHeader + dataPageHdr format.DataPageHeader + dataPageHdrV2 format.DataPageHeaderV2 + + pageReader file.PageReader +} + +func TestFileDeserializing(t *testing.T) { + t.Parallel() + suite.Run(t, new(PageSerdeSuite)) +} + +func (p *PageSerdeSuite) ResetStream() { + p.sink = encoding.NewBufferWriter(0, memory.DefaultAllocator) +} + +func (p *PageSerdeSuite) EndStream() { + p.buffer = p.sink.Finish() +} + +func (p *PageSerdeSuite) SetupTest() { + p.dataPageHdr.Encoding = format.Encoding_PLAIN + p.dataPageHdr.DefinitionLevelEncoding = format.Encoding_RLE + p.dataPageHdr.RepetitionLevelEncoding = format.Encoding_RLE + + p.ResetStream() +} + +func (p *PageSerdeSuite) InitSerializedPageReader(nrows int64, codec compress.Compression) { + p.EndStream() + + p.pageReader, _ = file.NewPageReader(bytes.NewReader(p.buffer.Bytes()), nrows, codec, memory.DefaultAllocator, nil) +} + +func (p *PageSerdeSuite) WriteDataPageHeader(maxSerialized int, uncompressed, compressed int32) { + // simplifying writing serialized data page headers which may or may + // not have meaningful data associated with them + + p.pageHdr.DataPageHeader = &p.dataPageHdr + p.pageHdr.UncompressedPageSize = uncompressed + p.pageHdr.CompressedPageSize = compressed + p.pageHdr.Type = format.PageType_DATA_PAGE + + serializer := thrift.NewThriftSerializer() + p.NotPanics(func() { + serializer.Serialize(&p.pageHdr, p.sink, nil) + }) +} + +func (p *PageSerdeSuite) WriteDataPageHeaderV2(maxSerialized int, uncompressed, compressed int32) { + p.pageHdr.DataPageHeaderV2 = &p.dataPageHdrV2 + p.pageHdr.UncompressedPageSize = uncompressed + p.pageHdr.CompressedPageSize = compressed + p.pageHdr.Type = format.PageType_DATA_PAGE_V2 + + serializer := thrift.NewThriftSerializer() + p.NotPanics(func() { + serializer.Serialize(&p.pageHdr, p.sink, nil) + }) +} + +func (p *PageSerdeSuite) CheckDataPageHeader(expected format.DataPageHeader, page file.Page) { + p.Equal(format.PageType_DATA_PAGE, page.Type()) + + p.IsType(&file.DataPageV1{}, page) + p.Equal(expected.NumValues, page.NumValues()) + p.Equal(expected.Encoding, page.Encoding()) + p.EqualValues(expected.DefinitionLevelEncoding, page.(*file.DataPageV1).DefinitionLevelEncoding()) + p.EqualValues(expected.RepetitionLevelEncoding, page.(*file.DataPageV1).RepetitionLevelEncoding()) + checkStatistics(p.T(), *expected.Statistics, page.(file.DataPage).Statistics()) +} + +func (p *PageSerdeSuite) CheckDataPageHeaderV2(expected format.DataPageHeaderV2, page file.Page) { + p.Equal(format.PageType_DATA_PAGE_V2, page.Type()) + + p.IsType(&file.DataPageV2{}, page) + p.Equal(expected.NumValues, page.NumValues()) + p.Equal(expected.Encoding, page.Encoding()) + p.Equal(expected.NumNulls, page.(*file.DataPageV2).NumNulls()) + p.Equal(expected.DefinitionLevelsByteLength, page.(*file.DataPageV2).DefinitionLevelByteLen()) + p.Equal(expected.RepetitionLevelsByteLength, page.(*file.DataPageV2).RepetitionLevelByteLen()) + p.Equal(expected.IsCompressed, page.(*file.DataPageV2).IsCompressed()) + checkStatistics(p.T(), *expected.Statistics, page.(file.DataPage).Statistics()) +} + +func (p *PageSerdeSuite) TestDataPageV1() { + const ( + statsSize = 512 + nrows = 4444 + ) + p.dataPageHdr.Statistics = getDummyStats(statsSize, true) + p.dataPageHdr.NumValues = nrows + + p.WriteDataPageHeader(1024, 0, 0) + p.InitSerializedPageReader(nrows, compress.Codecs.Uncompressed) + p.True(p.pageReader.Next()) + currentPage := p.pageReader.Page() + p.CheckDataPageHeader(p.dataPageHdr, currentPage) +} + +func (p *PageSerdeSuite) TestDataPageV2() { + const ( + statsSize = 512 + nrows = 4444 + ) + p.dataPageHdrV2.Statistics = getDummyStats(statsSize, true) + p.dataPageHdrV2.NumValues = nrows + p.WriteDataPageHeaderV2(1024, 0, 0) + p.InitSerializedPageReader(nrows, compress.Codecs.Uncompressed) + p.True(p.pageReader.Next()) + p.CheckDataPageHeaderV2(p.dataPageHdrV2, p.pageReader.Page()) +} + +func (p *PageSerdeSuite) TestLargePageHeaders() { + const ( + statsSize = 256 * 1024 // 256KB + nrows = 4141 + maxHeaderSize = 512 * 1024 // 512KB + ) + + p.dataPageHdr.Statistics = getDummyStats(statsSize, false) + p.dataPageHdr.NumValues = nrows + p.WriteDataPageHeader(maxHeaderSize, 0, 0) + pos, err := p.sink.Seek(0, io.SeekCurrent) + p.NoError(err) + p.GreaterOrEqual(maxHeaderSize, int(pos)) + p.LessOrEqual(statsSize, int(pos)) + p.GreaterOrEqual(16*1024*1024, int(pos)) + + p.InitSerializedPageReader(nrows, compress.Codecs.Uncompressed) + p.True(p.pageReader.Next()) + p.CheckDataPageHeader(p.dataPageHdr, p.pageReader.Page()) +} + +func (p *PageSerdeSuite) TestFailLargePageHeaders() { + const ( + statsSize = 256 * 1024 // 256KB + nrows = 1337 // dummy value + maxHeaderSize = 512 * 1024 // 512 KB + smallerMaxSize = 128 * 1024 // 128KB + ) + p.dataPageHdr.Statistics = getDummyStats(statsSize, false) + p.WriteDataPageHeader(maxHeaderSize, 0, 0) + pos, err := p.sink.Seek(0, io.SeekCurrent) + p.NoError(err) + p.GreaterOrEqual(maxHeaderSize, int(pos)) + + p.LessOrEqual(smallerMaxSize, int(pos)) + p.InitSerializedPageReader(nrows, compress.Codecs.Uncompressed) + p.pageReader.SetMaxPageHeaderSize(smallerMaxSize) + p.NotPanics(func() { p.False(p.pageReader.Next()) }) + p.Error(p.pageReader.Err()) +} + +func (p *PageSerdeSuite) TestCompression() { + codecs := []compress.Compression{ + compress.Codecs.Snappy, + compress.Codecs.Brotli, + compress.Codecs.Gzip, + // compress.Codecs.Lz4, // not yet implemented + compress.Codecs.Zstd, + } + + const ( + nrows = 32 // dummy value + npages = 10 + ) + p.dataPageHdr.NumValues = nrows + + fauxData := make([][]byte, npages) + for idx := range fauxData { + // each page is larger + fauxData[idx] = make([]byte, (idx+1)*64) + rand.Read(fauxData[idx]) + } + for _, c := range codecs { + p.Run(c.String(), func() { + codec, _ := compress.GetCodec(c) + for _, data := range fauxData { + maxCompressed := codec.CompressBound(int64(len(data))) + buffer := make([]byte, maxCompressed) + buffer = codec.Encode(buffer, data) + p.WriteDataPageHeader(1024, int32(len(data)), int32(len(buffer))) + _, err := p.sink.Write(buffer) + p.NoError(err) + } + + p.InitSerializedPageReader(nrows*npages, c) + + for _, data := range fauxData { + p.True(p.pageReader.Next()) + page := p.pageReader.Page() + p.IsType(&file.DataPageV1{}, page) + p.Equal(data, page.Data()) + } + p.ResetStream() + }) + } +} + +func TestInvalidHeaders(t *testing.T) { + badHeader := []byte("PAR2") + _, err := file.NewParquetReader(bytes.NewReader(badHeader)) + assert.Error(t, err) +} + +func TestInvalidFooter(t *testing.T) { + // file is smaller than FOOTER_SIZE + badFile := []byte("PAR1PAR") + _, err := file.NewParquetReader(bytes.NewReader(badFile)) + assert.Error(t, err) + + // Magic Number Incorrect + badFile2 := []byte("PAR1PAR2") + _, err = file.NewParquetReader(bytes.NewReader(badFile2)) + assert.Error(t, err) +} + +func TestIncompleteMetadata(t *testing.T) { + sink := encoding.NewBufferWriter(0, memory.DefaultAllocator) + magic := []byte("PAR1") + + sink.Write(magic) + sink.Write(make([]byte, 10)) + const metadataLen = 24 + binary.Write(sink, binary.LittleEndian, uint32(metadataLen)) + sink.Write(magic) + buf := sink.Finish() + defer buf.Release() + _, err := file.NewParquetReader(bytes.NewReader(buf.Bytes())) + assert.Error(t, err) +} diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go new file mode 100644 index 0000000000000..6c56c13933e08 --- /dev/null +++ b/go/parquet/file/level_conversion.go @@ -0,0 +1,262 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "math" + "math/bits" + "unsafe" + + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/bmi" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/schema" + "golang.org/x/xerrors" +) + +type LevelInfo struct { + // How many slots an undefined but present (i.e. null) element in + // parquet consumes when decoding to Arrow. + // "Slot" is used in the same context as the Arrow specification + // (i.e. a value holder). + // This is only ever >1 for descendents of FixedSizeList. + NullSlotUsage int32 + // The definition level at which the value for the field + // is considered not null (definition levels greater than + // or equal to this value indicate a not-null + // value for the field). For list fields definition levels + // greater than or equal to this field indicate a present, + // possibly null, child value. + DefLevel int16 + // The repetition level corresponding to this element + // or the closest repeated ancestor. Any repetition + // level less than this indicates either a new list OR + // an empty list (which is determined in conjunction + // with definition levels). + RepLevel int16 + // The definition level indicating the level at which the closest + // repeated ancestor is not empty. This is used to discriminate + // between a value less than |def_level| being null or excluded entirely. + // For instance if we have an arrow schema like: + // list(struct(f0: int)). Then then there are the following + // definition levels: + // 0 = null list + // 1 = present but empty list. + // 2 = a null value in the list + // 3 = a non null struct but null integer. + // 4 = a present integer. + // When reconstructing, the struct and integer arrays' + // repeated_ancestor_def_level would be 2. Any + // def_level < 2 indicates that there isn't a corresponding + // child value in the list. + // i.e. [null, [], [null], [{f0: null}], [{f0: 1}]] + // has the def levels [0, 1, 2, 3, 4]. The actual + // struct array is only of length 3: [not-set, set, set] and + // the int array is also of length 3: [N/A, null, 1]. + RepeatedAncestorDefLevel int16 +} + +func newDefaultLevelInfo() *LevelInfo { + return &LevelInfo{NullSlotUsage: 1} +} + +func (l *LevelInfo) Equal(rhs *LevelInfo) bool { + return l.NullSlotUsage == rhs.NullSlotUsage && + l.DefLevel == rhs.DefLevel && + l.RepLevel == rhs.RepLevel && + l.RepeatedAncestorDefLevel == rhs.RepeatedAncestorDefLevel +} + +func (l *LevelInfo) HasNullableValues() bool { + return l.RepeatedAncestorDefLevel < l.DefLevel +} + +func (l *LevelInfo) IncrementOptional() { + l.DefLevel++ +} + +func (l *LevelInfo) IncrementRepeated() int16 { + lastRepAncestor := l.RepeatedAncestorDefLevel + // Repeated fields add both a repetition and definition level. This is used + // to distinguish between an empty list and a list with an item in it. + l.RepLevel++ + l.DefLevel++ + + // For levels >= repeated_ancenstor_def_level it indicates the list was + // non-null and had at least one element. This is important + // for later decoding because we need to add a slot for these + // values. for levels < current_def_level no slots are added + // to arrays. + l.RepeatedAncestorDefLevel = l.DefLevel + return lastRepAncestor +} + +func (l *LevelInfo) Increment(n schema.Node) { + switch n.RepetitionType() { + case parquet.Repetitions.Repeated: + l.IncrementRepeated() + case parquet.Repetitions.Optional: + l.IncrementOptional() + } +} + +// Input/Output structure for reconstructed validity bitmaps. +type ValidityBitmapInputOutput struct { + // Input only. + // The maximum number of values_read expected (actual + // values read must be less than or equal to this value). + // If this number is exceeded methods will throw a + // ParquetException. Exceeding this limit indicates + // either a corrupt or incorrectly written file. + ReadUpperBound int64 + // Output only. The number of values added to the encountered + // (this is logically the count of the number of elements + // for an Arrow array). + Read int64 + // Input/Output. The number of nulls encountered. + NullCount int64 + // Output only. The validity bitmap to populate. May be be null only + // for DefRepLevelsToListInfo (if all that is needed is list offsets). + ValidBits []byte + // Input only, offset into valid_bits to start at. + ValidBitsOffset int64 +} + +const extractBitsSize int64 = 8 * int64(unsafe.Sizeof(uint64(0))) + +// create a bitmap out of the definition Levels and return the number of non-null values +func defLevelsBatchToBitmap(defLevels []int16, remainingUpperBound int64, info LevelInfo, wr utils.BitmapWriter, hasRepeatedParent bool) uint64 { + definedBitmap := bmi.GreaterThanBitmap(defLevels, info.DefLevel-1) + + if hasRepeatedParent { + // Greater than level_info.repeated_ancestor_def_level - 1 implies >= the + // repeated_ancestor_def_level + presentBitmap := bmi.GreaterThanBitmap(defLevels, info.RepeatedAncestorDefLevel-1) + selectedBits := bmi.ExtractBits(definedBitmap, presentBitmap) + selectedCount := int64(bits.OnesCount64(presentBitmap)) + if selectedCount > remainingUpperBound { + panic("values read exceeded upper bound") + } + wr.AppendWord(selectedBits, selectedCount) + return uint64(bits.OnesCount64(selectedBits)) + } + + if int64(len(defLevels)) > remainingUpperBound { + panic("values read exceed upper bound") + } + + wr.AppendWord(definedBitmap, int64(len(defLevels))) + return uint64(bits.OnesCount64(definedBitmap)) +} + +// create a bitmap out of the definition Levels +func defLevelsToBitmapInternal(defLevels []int16, info LevelInfo, out *ValidityBitmapInputOutput, hasRepeatedParent bool) { + wr := utils.NewFirstTimeBitmapWriter(out.ValidBits, out.ValidBitsOffset, int64(len(defLevels))) + defer wr.Finish() + setCount := defLevelsBatchToBitmap(defLevels, out.ReadUpperBound, info, wr, hasRepeatedParent) + out.Read = int64(wr.Pos()) + out.NullCount += out.Read - int64(setCount) +} + +// DefLevelsToBitmap creates a validitybitmap out of the passed in definition levels and info object. +func DefLevelsToBitmap(defLevels []int16, info LevelInfo, out *ValidityBitmapInputOutput) { + hasRepeatedParent := false + if info.RepLevel > 0 { + hasRepeatedParent = true + } + defLevelsToBitmapInternal(defLevels, info, out, hasRepeatedParent) +} + +// DefRepLevelsToListInfo takes in the definition and repetition levels in order to populate the validity bitmap +// and properly handle nested lists and update the offsets for them. +func DefRepLevelsToListInfo(defLevels, repLevels []int16, info LevelInfo, out *ValidityBitmapInputOutput, offsets []int32) error { + var wr utils.BitmapWriter + if out.ValidBits != nil { + wr = utils.NewFirstTimeBitmapWriter(out.ValidBits, out.ValidBitsOffset, out.ReadUpperBound) + defer wr.Finish() + } + offsetPos := 0 + for idx := range defLevels { + // skip items that belong to empty or null ancestor lists and further nested lists + if defLevels[idx] < info.RepeatedAncestorDefLevel || repLevels[idx] > info.RepLevel { + continue + } + + if repLevels[idx] == info.RepLevel { + // continuation of an existing list. + // offsets can be null for structs with repeated children + if offsetPos < len(offsets) { + if offsets[offsetPos] == math.MaxInt32 { + return xerrors.New("list index overflow") + } + offsets[offsetPos]++ + } + } else { + if (wr != nil && int64(wr.Pos()) >= out.ReadUpperBound) || (offsetPos >= int(out.ReadUpperBound)) { + return xerrors.Errorf("definition levels exceeded upper bound: %d", out.ReadUpperBound) + } + + // current_rep < list rep_level i.e. start of a list (ancestor empty lists + // are filtered out above) + // offsets can be null for structs with repeated children + if offsetPos+1 < len(offsets) { + offsetPos++ + // use cumulative offsets because variable size lists are more common + // than fixed size lists so it should be cheaper to make these + // cumulative and subtract when validating fixed size lists + offsets[offsetPos] = offsets[offsetPos-1] + if defLevels[idx] >= info.DefLevel { + if offsets[offsetPos] == math.MaxInt32 { + return xerrors.New("list index overflow") + } + offsets[offsetPos]++ + } + } + + if wr != nil { + // the level info def level for lists reflects element present level + // the prior level distinguishes between empty lists + if defLevels[idx] >= info.DefLevel-1 { + wr.Set() + } else { + out.NullCount++ + wr.Clear() + } + wr.Next() + } + } + } + + if len(offsets) > 0 { + out.Read = int64(offsetPos) + } else if wr != nil { + out.Read = int64(wr.Pos()) + } + + if out.NullCount > 0 && info.NullSlotUsage > 1 { + return xerrors.New("null values with null_slot_usage > 1 not supported.") + } + return nil +} + +// DefRepLevelsToBitmap constructs a full validitybitmap out of the definition and repetition levels +// properly handling nested lists and parents. +func DefRepLevelsToBitmap(defLevels, repLevels []int16, info LevelInfo, out *ValidityBitmapInputOutput) error { + info.RepLevel++ + info.DefLevel++ + return DefRepLevelsToListInfo(defLevels, repLevels, info, out, nil) +} diff --git a/go/parquet/file/level_conversion_test.go b/go/parquet/file/level_conversion_test.go new file mode 100644 index 0000000000000..08d2fe311f88a --- /dev/null +++ b/go/parquet/file/level_conversion_test.go @@ -0,0 +1,194 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "strings" + "testing" + + "github.com/apache/arrow/go/arrow/bitutil" + "github.com/apache/arrow/go/parquet/internal/bmi" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/stretchr/testify/assert" +) + +func bitmapToString(bitmap []byte, bitCount int64) string { + var bld strings.Builder + bld.Grow(int(bitCount)) + for i := 0; i < int(bitCount); i++ { + if bitutil.BitIsSet(bitmap, i) { + bld.WriteByte('1') + } else { + bld.WriteByte('0') + } + } + return bld.String() +} + +func TestDefLevelsToBitmap(t *testing.T) { + defLevels := []int16{3, 3, 3, 2, 3, 3, 3, 3, 3} + validBits := []byte{2, 0} + + var info LevelInfo + info.DefLevel = 3 + info.RepLevel = 1 + + var io ValidityBitmapInputOutput + io.ReadUpperBound = int64(len(defLevels)) + io.Read = -1 + io.ValidBits = validBits + + DefLevelsToBitmap(defLevels, info, &io) + assert.Equal(t, int64(9), io.Read) + assert.Equal(t, int64(1), io.NullCount) + + // call again with 0 definition levels make sure that valid bits is unmodified + curByte := validBits[1] + io.NullCount = 0 + DefLevelsToBitmap(defLevels[:0], info, &io) + + assert.Zero(t, io.Read) + assert.Zero(t, io.NullCount) + assert.Equal(t, curByte, validBits[1]) +} + +func TestDefLevelstToBitmapPowerOf2(t *testing.T) { + defLevels := []int16{3, 3, 3, 2, 3, 3, 3, 3} + validBits := []byte{1, 0} + + var ( + info LevelInfo + io ValidityBitmapInputOutput + ) + + info.RepLevel = 1 + info.DefLevel = 3 + io.Read = -1 + io.ReadUpperBound = int64(len(defLevels)) + io.ValidBits = validBits + + DefLevelsToBitmap(defLevels[4:8], info, &io) + assert.Equal(t, int64(4), io.Read) + assert.Zero(t, io.NullCount) +} + +func TestGreaterThanBitmapGeneratesExpectedBitmasks(t *testing.T) { + defLevels := []int16{ + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7} + + tests := []struct { + name string + num int + rhs int16 + expected uint64 + }{ + {"no levels", 0, 0, 0}, + {"64 and 8", 64, 8, 0}, + {"64 and -1", 64, -1, 0xFFFFFFFFFFFFFFFF}, + // should be zero padded + {"zero pad 47, -1", 47, -1, 0x7FFFFFFFFFFF}, + {"zero pad 64 and 6", 64, 6, 0x8080808080808080}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, bmi.GreaterThanBitmap(defLevels[:tt.num], tt.rhs)) + }) + } +} + +func TestWithRepetitionlevelFiltersOutEmptyListValues(t *testing.T) { + validityBitmap := make([]byte, 8) + io := ValidityBitmapInputOutput{ + ReadUpperBound: 64, + Read: 1, + NullCount: 5, + ValidBits: validityBitmap, + ValidBitsOffset: 1, + } + + info := LevelInfo{ + RepeatedAncestorDefLevel: 1, + DefLevel: 2, + RepLevel: 1, + } + + defLevels := []int16{0, 0, 0, 2, 2, 1, 0, 2} + DefLevelsToBitmap(defLevels, info, &io) + + assert.Equal(t, bitmapToString(validityBitmap, 8), "01101000") + for _, x := range validityBitmap[1:] { + assert.Zero(t, x) + } + assert.EqualValues(t, 6, io.NullCount) + assert.EqualValues(t, 4, io.Read) +} + +type MultiLevelTestData struct { + defLevels []int16 + repLevels []int16 +} + +func TriplNestedList() MultiLevelTestData { + // Triply nested list values borrow from write_path + // [null, [[1, null, 3], []], []], + // [[[]], [[], [1, 2]], null, [[3]]], + // null, + // [] + return MultiLevelTestData{ + defLevels: []int16{2, 7, 6, 7, 5, 3, // first row + 5, 5, 7, 7, 2, 7, // second row + 0, // third row + 1}, + repLevels: []int16{0, 1, 3, 3, 2, 1, // first row + 0, 1, 2, 3, 1, 1, // second row + 0, 0}, + } +} + +func TestActualCase(t *testing.T) { + out := make([]byte, 512) + defs := make([]int16, 64) + for i := range defs { + defs[i] = 3 + } + + defs[0] = 0 + defs[25] = 0 + defs[33] = 0 + defs[49] = 0 + defs[58] = 0 + defs[59] = 0 + defs[60] = 0 + defs[61] = 0 + + remaining := int64(4096) + info := LevelInfo{ + NullSlotUsage: 0, + DefLevel: 3, + RepLevel: 1, + RepeatedAncestorDefLevel: 2, + } + + wr := utils.NewFirstTimeBitmapWriter(out, 0, 4096) + v := defLevelsBatchToBitmap(defs, remaining, info, wr, true) + assert.EqualValues(t, 56, v) + assert.Equal(t, []byte{255, 255, 255, 255}, out[:4]) +} diff --git a/go/parquet/file/page_reader.go b/go/parquet/file/page_reader.go new file mode 100644 index 0000000000000..251499af21ce7 --- /dev/null +++ b/go/parquet/file/page_reader.go @@ -0,0 +1,620 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "bytes" + "io" + "sync" + + "github.com/JohnCGriffin/overflow" + "github.com/apache/arrow/go/arrow/ipc" + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/compress" + "github.com/apache/arrow/go/parquet/internal/debug" + "github.com/apache/arrow/go/parquet/internal/encryption" + format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" + "github.com/apache/arrow/go/parquet/internal/thrift" + "github.com/apache/arrow/go/parquet/metadata" + "golang.org/x/xerrors" +) + +// PageReader is the interface used by the columnreader in order to read +// and handle DataPages and loop through them. +type PageReader interface { + // Set the maximum Page header size allowed to be read + SetMaxPageHeaderSize(int) + // Return the current page, or nil if there are no more + Page() Page + // Fetch the next page, returns false if there are no more pages + Next() bool + // if Next returns false, Err will return the error encountered or + // nil if there was no error and you just hit the end of the page + Err() error + // Reset allows reusing a page reader + Reset(r parquet.ReaderAtSeeker, nrows int64, compressType compress.Compression, ctx *CryptoContext) +} + +// Page is an interface for handling DataPages or Dictionary Pages +type Page interface { + // Returns which kind of page this is + Type() format.PageType + // Get the raw bytes of this page + Data() []byte + // return the encoding used for this page, Plain/RLE, etc. + Encoding() format.Encoding + // get the number of values in this page + NumValues() int32 + // release this page object back into the page pool for re-use + Release() +} + +type page struct { + buf *memory.Buffer + typ format.PageType + + nvals int32 + encoding format.Encoding +} + +func (p *page) Type() format.PageType { return p.typ } +func (p *page) Data() []byte { return p.buf.Bytes() } +func (p *page) NumValues() int32 { return p.nvals } +func (p *page) Encoding() format.Encoding { return p.encoding } + +// DataPage is the base interface for both DataPageV1 and DataPageV2 of the +// parquet spec. +type DataPage interface { + Page + UncompressedSize() int64 + Statistics() metadata.EncodedStatistics +} + +// Create some pools to use for reusing the data page objects themselves so that +// we can avoid tight loops that are creating and destroying tons of individual +// objects. This combined with a Release function on the pages themselves +// which will put them back into the pool yields significant memory reduction +// and performance benefits + +var dataPageV1Pool = sync.Pool{ + New: func() interface{} { return (*DataPageV1)(nil) }, +} + +var dataPageV2Pool = sync.Pool{ + New: func() interface{} { return (*DataPageV2)(nil) }, +} + +var dictPagePool = sync.Pool{ + New: func() interface{} { return (*DictionaryPage)(nil) }, +} + +// DataPageV1 represents a DataPage version 1 from the parquet.thrift file +type DataPageV1 struct { + page + + defLvlEncoding format.Encoding + repLvlEncoding format.Encoding + uncompressedSize int64 + statistics metadata.EncodedStatistics +} + +// NewDataPageV1 returns a V1 data page with the given buffer as its data and the specified encoding information +// +// Will utilize objects that have been released back into the data page pool and +// re-use them if available as opposed to creating new objects. Calling Release on the +// data page object will release it back to the pool for re-use. +func NewDataPageV1(buffer *memory.Buffer, num int32, encoding, defEncoding, repEncoding parquet.Encoding, uncompressedSize int64) *DataPageV1 { + dp := dataPageV1Pool.Get().(*DataPageV1) + if dp == nil { + return &DataPageV1{ + page: page{buf: buffer, typ: format.PageType_DATA_PAGE, nvals: num, encoding: format.Encoding(encoding)}, + defLvlEncoding: format.Encoding(defEncoding), + repLvlEncoding: format.Encoding(repEncoding), + uncompressedSize: uncompressedSize, + } + } + + dp.buf, dp.nvals = buffer, num + dp.encoding = format.Encoding(encoding) + dp.defLvlEncoding, dp.repLvlEncoding = format.Encoding(defEncoding), format.Encoding(repEncoding) + dp.statistics.HasMax, dp.statistics.HasMin = false, false + dp.statistics.HasNullCount, dp.statistics.HasDistinctCount = false, false + dp.uncompressedSize = uncompressedSize + return dp +} + +// NewDataPageV1WithStats is the same as NewDataPageV1, but also allows adding the stat info into the created page +func NewDataPageV1WithStats(buffer *memory.Buffer, num int32, encoding, defEncoding, repEncoding parquet.Encoding, uncompressedSize int64, stats metadata.EncodedStatistics) *DataPageV1 { + ret := NewDataPageV1(buffer, num, encoding, defEncoding, repEncoding, uncompressedSize) + ret.statistics = stats + return ret +} + +// Release this page back into the DataPage object pool so that it can be reused. +// +// After calling this function, the object should not be utilized anymore, otherwise +// conflicts can arise. +func (d *DataPageV1) Release() { + d.buf.Release() + d.buf = nil + dataPageV1Pool.Put(d) +} + +// UncompressedSize returns the size of the data in this data page when uncompressed +func (d *DataPageV1) UncompressedSize() int64 { return d.uncompressedSize } + +// Statistics returns the encoded statistics on this data page +func (d *DataPageV1) Statistics() metadata.EncodedStatistics { return d.statistics } + +// DefinitionLevelEncoding returns the encoding utilized for the Definition Levels +func (d *DataPageV1) DefinitionLevelEncoding() parquet.Encoding { + return parquet.Encoding(d.defLvlEncoding) +} + +// RepetitionLevelEncoding returns the encoding utilized for the Repetition Levels +func (d *DataPageV1) RepetitionLevelEncoding() parquet.Encoding { + return parquet.Encoding(d.repLvlEncoding) +} + +// DataPageV2 is the representation of the V2 data page from the parquet.thrift spec +type DataPageV2 struct { + page + + nulls int32 + nrows int32 + defLvlByteLen int32 + repLvlByteLen int32 + compressed bool + uncompressedSize int64 + statistics metadata.EncodedStatistics +} + +// NewDataPageV2 constructs a new V2 data page with the provided information and a buffer of the raw data. +func NewDataPageV2(buffer *memory.Buffer, numValues, numNulls, numRows int32, encoding parquet.Encoding, defLvlsByteLen, repLvlsByteLen int32, uncompressed int64, isCompressed bool) *DataPageV2 { + dp := dataPageV2Pool.Get().(*DataPageV2) + if dp == nil { + return &DataPageV2{ + page: page{buf: buffer, typ: format.PageType_DATA_PAGE_V2, nvals: numValues, encoding: format.Encoding(encoding)}, + nulls: numNulls, + nrows: numRows, + defLvlByteLen: defLvlsByteLen, + repLvlByteLen: repLvlsByteLen, + compressed: isCompressed, + uncompressedSize: uncompressed, + } + } + + dp.buf, dp.nvals = buffer, numValues + dp.encoding = format.Encoding(encoding) + dp.nulls, dp.nrows = numNulls, numRows + dp.defLvlByteLen, dp.repLvlByteLen = defLvlsByteLen, repLvlsByteLen + dp.compressed, dp.uncompressedSize = isCompressed, uncompressed + dp.statistics.HasMax, dp.statistics.HasMin = false, false + dp.statistics.HasNullCount, dp.statistics.HasDistinctCount = false, false + return dp +} + +// NewDataPageV2WithStats is the same as NewDataPageV2 but allows providing the encoded stats with the page. +func NewDataPageV2WithStats(buffer *memory.Buffer, numValues, numNulls, numRows int32, encoding parquet.Encoding, defLvlsByteLen, repLvlsByteLen int32, uncompressed int64, isCompressed bool, stats metadata.EncodedStatistics) *DataPageV2 { + ret := NewDataPageV2(buffer, numValues, numNulls, numRows, encoding, defLvlsByteLen, repLvlsByteLen, uncompressed, isCompressed) + ret.statistics = stats + return ret +} + +// Release this page back into the DataPage object pool so that it can be reused. +// +// After calling this function, the object should not be utilized anymore, otherwise +// conflicts can arise. +func (d *DataPageV2) Release() { + d.buf.Release() + d.buf = nil + dataPageV2Pool.Put(d) +} + +// UncompressedSize is the size of the raw page when uncompressed. If `IsCompressed` is true, then +// the raw data in the buffer is expected to be compressed. +func (d *DataPageV2) UncompressedSize() int64 { return d.uncompressedSize } + +// Statistics are the encoded statistics in the data page +func (d *DataPageV2) Statistics() metadata.EncodedStatistics { return d.statistics } + +// NumNulls is the reported number of nulls in this datapage +func (d *DataPageV2) NumNulls() int32 { return d.nulls } + +// DefinitionLevelByteLen is the number of bytes in the buffer that are used to represent the definition levels +func (d *DataPageV2) DefinitionLevelByteLen() int32 { return d.defLvlByteLen } + +// RepetitionLevelByteLen is the number of bytes in the buffer which are used to represent the repetition Levels +func (d *DataPageV2) RepetitionLevelByteLen() int32 { return d.repLvlByteLen } + +// IsCompressed returns true if the data of this page is compressed +func (d *DataPageV2) IsCompressed() bool { return d.compressed } + +// DictionaryPage represents the a page of data that uses dictionary encoding +type DictionaryPage struct { + page + + sorted bool +} + +// NewDictionaryPage constructs a new dictionary page with the provided data buffer and number of values. +func NewDictionaryPage(buffer *memory.Buffer, nvals int32, encoding parquet.Encoding) *DictionaryPage { + dp := dictPagePool.Get().(*DictionaryPage) + if dp == nil { + return &DictionaryPage{ + page: page{ + buf: buffer, + typ: format.PageType_DICTIONARY_PAGE, + nvals: nvals, + encoding: format.Encoding(encoding), + }, + } + } + + dp.buf = buffer + dp.nvals = nvals + dp.encoding = format.Encoding(encoding) + dp.sorted = false + return dp +} + +// Release this page back into the DataPage object pool so that it can be reused. +// +// After calling this function, the object should not be utilized anymore, otherwise +// conflicts can arise. +func (d *DictionaryPage) Release() { + d.buf.Release() + d.buf = nil + dictPagePool.Put(d) +} + +// IsSorted returns whether the dictionary itself is sorted +func (d *DictionaryPage) IsSorted() bool { return d.sorted } + +type serializedPageReader struct { + r ipc.ReadAtSeeker + nrows int64 + rowsSeen int64 + mem memory.Allocator + codec compress.Codec + + curPageHdr *format.PageHeader + buf *memory.Buffer + pageOrd int16 + maxPageHeaderSize int + + curPage Page + cryptoCtx CryptoContext + dataPageAad string + dataPageHeaderAad string + + decompressBuffer bytes.Buffer + err error +} + +// NewPageReader returns a page reader for the data which can be read from the provided reader and compression. +func NewPageReader(r parquet.ReaderAtSeeker, nrows int64, compressType compress.Compression, mem memory.Allocator, ctx *CryptoContext) (PageReader, error) { + if mem == nil { + mem = memory.NewGoAllocator() + } + + codec, err := compress.GetCodec(compressType) + if err != nil { + return nil, err + } + + rdr := &serializedPageReader{ + r: r, + maxPageHeaderSize: defaultMaxPageHeaderSize, + nrows: nrows, + mem: mem, + codec: codec, + buf: memory.NewResizableBuffer(mem), + } + rdr.decompressBuffer.Grow(defaultPageHeaderSize) + if ctx != nil { + rdr.cryptoCtx = *ctx + rdr.initDecryption() + } + return rdr, nil +} + +func (p *serializedPageReader) Reset(r parquet.ReaderAtSeeker, nrows int64, compressType compress.Compression, ctx *CryptoContext) { + p.rowsSeen, p.pageOrd = 0, 0 + p.curPageHdr, p.curPage, p.err = nil, nil, nil + p.r, p.nrows = r, nrows + + p.codec, p.err = compress.GetCodec(compressType) + if p.err != nil { + return + } + p.buf.ResizeNoShrink(0) + p.decompressBuffer.Reset() + if ctx != nil { + p.cryptoCtx = *ctx + p.initDecryption() + } else { + p.cryptoCtx = CryptoContext{} + p.dataPageAad = "" + p.dataPageHeaderAad = "" + } +} + +func (p *serializedPageReader) Err() error { return p.err } + +func (p *serializedPageReader) SetMaxPageHeaderSize(sz int) { + p.maxPageHeaderSize = sz +} + +func (p *serializedPageReader) initDecryption() { + if p.cryptoCtx.DataDecryptor != nil { + p.dataPageAad = encryption.CreateModuleAad(p.cryptoCtx.DataDecryptor.FileAad(), encryption.DataPageModule, + p.cryptoCtx.RowGroupOrdinal, p.cryptoCtx.ColumnOrdinal, -1) + } + if p.cryptoCtx.MetaDecryptor != nil { + p.dataPageHeaderAad = encryption.CreateModuleAad(p.cryptoCtx.MetaDecryptor.FileAad(), encryption.DataPageHeaderModule, + p.cryptoCtx.RowGroupOrdinal, p.cryptoCtx.ColumnOrdinal, -1) + } +} + +func (p *serializedPageReader) updateDecryption(decrypt encryption.Decryptor, moduleType int8, pageAad string) { + if p.cryptoCtx.StartDecryptWithDictionaryPage { + aad := encryption.CreateModuleAad(decrypt.FileAad(), moduleType, p.cryptoCtx.RowGroupOrdinal, p.cryptoCtx.ColumnOrdinal, -1) + decrypt.UpdateAad(aad) + } else { + pageaad := []byte(pageAad) + encryption.QuickUpdatePageAad(pageaad, p.pageOrd) + decrypt.UpdateAad(string(pageaad)) + } +} + +func (p *serializedPageReader) Page() Page { + return p.curPage +} + +func (p *serializedPageReader) decompress(lenCompressed int, buf []byte) ([]byte, error) { + p.decompressBuffer.Reset() + p.decompressBuffer.Grow(lenCompressed) + if _, err := io.CopyN(&p.decompressBuffer, p.r, int64(lenCompressed)); err != nil { + return nil, err + } + + data := p.decompressBuffer.Bytes() + if p.cryptoCtx.DataDecryptor != nil { + data = p.cryptoCtx.DataDecryptor.Decrypt(p.decompressBuffer.Bytes()) + } + + return p.codec.Decode(buf, data), nil +} + +type dataheader interface { + IsSetStatistics() bool + GetStatistics() *format.Statistics +} + +func extractStats(dataHeader dataheader) (pageStats metadata.EncodedStatistics) { + if dataHeader.IsSetStatistics() { + stats := dataHeader.GetStatistics() + if stats.IsSetMaxValue() { + pageStats.SetMax(stats.GetMaxValue()) + } else if stats.IsSetMax() { + pageStats.SetMax(stats.GetMax()) + } + if stats.IsSetMinValue() { + pageStats.SetMin(stats.GetMinValue()) + } else if stats.IsSetMin() { + pageStats.SetMin(stats.GetMin()) + } + + if stats.IsSetNullCount() { + pageStats.SetNullCount(stats.GetNullCount()) + } + if stats.IsSetDistinctCount() { + pageStats.SetDistinctCount(stats.GetDistinctCount()) + } + } + return +} + +func (p *serializedPageReader) Next() bool { + // Loop here because there may be unhandled page types that we skip until + // finding a page that we do know what to do with + if p.curPage != nil { + p.curPage.Release() + } + p.curPage = nil + p.curPageHdr = format.NewPageHeader() + p.err = nil + + for p.rowsSeen < p.nrows { + // headerSize := 0 + allowedPgSz := defaultPageHeaderSize + + start, _ := p.r.Seek(0, io.SeekCurrent) + p.decompressBuffer.Reset() + // Page headers can be very large because of page statistics + // We try to deserialize a larger buffer progressively + // until a maximum allowed header limit + for { + n, err := io.CopyN(&p.decompressBuffer, p.r, int64(allowedPgSz)) + // view, err := p.r.Peek(allowedPgSz) + if err != nil && err != io.EOF { + p.err = err + return false + } + + if n == 0 { + return false + } + + view := p.decompressBuffer.Bytes() + + extra := 0 + if p.cryptoCtx.MetaDecryptor != nil { + p.updateDecryption(p.cryptoCtx.MetaDecryptor, encryption.DictPageHeaderModule, p.dataPageHeaderAad) + view = p.cryptoCtx.MetaDecryptor.Decrypt(view) + extra = p.cryptoCtx.MetaDecryptor.CiphertextSizeDelta() + } + + remaining, err := thrift.DeserializeThrift(p.curPageHdr, view) + if err != nil { + allowedPgSz *= 2 + if allowedPgSz > p.maxPageHeaderSize { + p.err = xerrors.New("parquet: deserializing page header failed") + return false + } + continue + } + + p.r.Seek(start+int64(len(view)-int(remaining)+extra), io.SeekStart) + break + } + + lenCompressed := int(p.curPageHdr.GetCompressedPageSize()) + lenUncompressed := int(p.curPageHdr.GetUncompressedPageSize()) + if lenCompressed < 0 || lenUncompressed < 0 { + p.err = xerrors.New("parquet: invalid page header") + return false + } + + if p.cryptoCtx.DataDecryptor != nil { + p.updateDecryption(p.cryptoCtx.DataDecryptor, encryption.DictPageModule, p.dataPageAad) + } + + p.buf.ResizeNoShrink(lenUncompressed) + + switch p.curPageHdr.GetType() { + case format.PageType_DICTIONARY_PAGE: + p.cryptoCtx.StartDecryptWithDictionaryPage = false + dictHeader := p.curPageHdr.GetDictionaryPageHeader() + if dictHeader.GetNumValues() < 0 { + p.err = xerrors.New("parquet: invalid page header (negative number of values)") + return false + } + + data, err := p.decompress(lenCompressed, p.buf.Bytes()) + if err != nil { + p.err = err + return false + } + debug.Assert(len(data) == lenUncompressed, "len(data) != lenUncompressed") + + // p.buf.Resize(lenUncompressed) + // make dictionary page + p.curPage = &DictionaryPage{ + page: page{ + buf: memory.NewBufferBytes(data), + typ: p.curPageHdr.Type, + nvals: dictHeader.GetNumValues(), + encoding: dictHeader.GetEncoding(), + }, + sorted: dictHeader.IsSetIsSorted() && dictHeader.GetIsSorted(), + } + + case format.PageType_DATA_PAGE: + p.pageOrd++ + dataHeader := p.curPageHdr.GetDataPageHeader() + if dataHeader.GetNumValues() < 0 { + p.err = xerrors.New("parquet: invalid page header (negative number of values)") + return false + } + + p.rowsSeen += int64(dataHeader.GetNumValues()) + data, err := p.decompress(lenCompressed, p.buf.Bytes()) + if err != nil { + p.err = err + return false + } + debug.Assert(len(data) == lenUncompressed, "len(data) != lenUncompressed") + + // make datapagev1 + p.curPage = &DataPageV1{ + page: page{ + buf: memory.NewBufferBytes(data), + typ: p.curPageHdr.Type, + nvals: dataHeader.GetNumValues(), + encoding: dataHeader.GetEncoding(), + }, + defLvlEncoding: dataHeader.GetDefinitionLevelEncoding(), + repLvlEncoding: dataHeader.GetRepetitionLevelEncoding(), + uncompressedSize: int64(lenUncompressed), + statistics: extractStats(dataHeader), + } + case format.PageType_DATA_PAGE_V2: + p.pageOrd++ + dataHeader := p.curPageHdr.GetDataPageHeaderV2() + if dataHeader.GetNumValues() < 0 { + p.err = xerrors.New("parquet: invalid page header (negative number of values)") + return false + } + + if dataHeader.GetDefinitionLevelsByteLength() < 0 || dataHeader.GetRepetitionLevelsByteLength() < 0 { + p.err = xerrors.New("parquet: invalid page header (negative levels byte length)") + return false + } + + compressed := dataHeader.GetIsCompressed() + // extract stats + p.rowsSeen += int64(dataHeader.GetNumValues()) + levelsBytelen, ok := overflow.Add(int(dataHeader.GetDefinitionLevelsByteLength()), int(dataHeader.GetRepetitionLevelsByteLength())) + if !ok { + p.err = xerrors.New("parquet: levels size too large (corrupt file?)") + return false + } + + var data []byte + if compressed { + if levelsBytelen > 0 { + io.ReadFull(p.r, p.buf.Bytes()[:levelsBytelen]) + } + if data, p.err = p.decompress(lenCompressed-levelsBytelen, p.buf.Bytes()[levelsBytelen:]); p.err != nil { + return false + } + } else { + io.ReadFull(p.r, p.buf.Bytes()) + data = p.buf.Bytes() + } + debug.Assert(len(data) == lenUncompressed, "len(data) != lenUncompressed") + + // make datapage v2 + p.curPage = &DataPageV2{ + page: page{ + buf: memory.NewBufferBytes(data), + typ: p.curPageHdr.Type, + nvals: dataHeader.GetNumValues(), + encoding: dataHeader.GetEncoding(), + }, + nulls: dataHeader.GetNumNulls(), + nrows: dataHeader.GetNumRows(), + defLvlByteLen: dataHeader.GetDefinitionLevelsByteLength(), + repLvlByteLen: dataHeader.GetRepetitionLevelsByteLength(), + compressed: compressed, + uncompressedSize: int64(lenUncompressed), + statistics: extractStats(dataHeader), + } + default: + // we don't know this page type, we're allowed to skip non-data pages + continue + } + + p.buf = memory.NewResizableBuffer(p.mem) + return true + } + + return false +} diff --git a/go/parquet/file/row_group_reader.go b/go/parquet/file/row_group_reader.go new file mode 100644 index 0000000000000..9c74a25c11eca --- /dev/null +++ b/go/parquet/file/row_group_reader.go @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "github.com/apache/arrow/go/arrow/ipc" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/internal/encryption" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/metadata" + "golang.org/x/xerrors" +) + +const ( + maxDictHeaderSize int64 = 100 +) + +// RowGroupReader is the primary interface for reading a single row group +type RowGroupReader struct { + r ipc.ReadAtSeeker + sourceSz int64 + fileMetadata *metadata.FileMetaData + rgMetadata *metadata.RowGroupMetaData + props *parquet.ReaderProperties + fileDecryptor encryption.FileDecryptor +} + +// MetaData returns the metadata of the current Row Group +func (r *RowGroupReader) MetaData() *metadata.RowGroupMetaData { return r.rgMetadata } + +// NumColumns returns the number of columns of data as defined in the metadata of this row group +func (r *RowGroupReader) NumColumns() int { return r.rgMetadata.NumColumns() } + +// NumRows returns the number of rows in just this row group +func (r *RowGroupReader) NumRows() int64 { return r.rgMetadata.NumRows() } + +// ByteSize returns the full byte size of this row group as defined in its metadata +func (r *RowGroupReader) ByteSize() int64 { return r.rgMetadata.TotalByteSize() } + +// Column returns a column reader for the requested (0-indexed) column +// +// panics if passed a column not in the range [0, NumColumns) +func (r *RowGroupReader) Column(i int) ColumnChunkReader { + if i >= r.NumColumns() || i < 0 { + panic(xerrors.Errorf("parquet: trying to read column index %d but row group metadata only has %d columns", i, r.rgMetadata.NumColumns())) + } + + descr := r.fileMetadata.Schema.Column(i) + pageRdr, err := r.GetColumnPageReader(i) + if err != nil { + panic(xerrors.Errorf("parquet: unable to initialize page reader: %w", err)) + } + return NewColumnReader(descr, pageRdr, r.props.Allocator()) +} + +func (r *RowGroupReader) GetColumnPageReader(i int) (PageReader, error) { + col, err := r.rgMetadata.ColumnChunk(i) + if err != nil { + return nil, err + } + + colStart := col.DataPageOffset() + if col.HasDictionaryPage() && col.DictionaryPageOffset() > 0 && colStart > col.DictionaryPageOffset() { + colStart = col.DictionaryPageOffset() + } + + colLen := col.TotalCompressedSize() + if r.fileMetadata.WriterVersion().LessThan(metadata.Parquet816FixedVersion) { + bytesRemain := r.sourceSz - (colStart + colLen) + padding := utils.Min(maxDictHeaderSize, bytesRemain) + colLen += padding + } + + stream, err := r.props.GetStream(r.r, colStart, colLen) + if err != nil { + return nil, err + } + + cryptoMetadata := col.CryptoMetadata() + if cryptoMetadata == nil { + return NewPageReader(stream, col.NumValues(), col.Compression(), r.props.Allocator(), nil) + } + + if r.fileDecryptor == nil { + return nil, xerrors.New("column in rowgroup is encrypted, but no file decryptor") + } + + const encryptedRowGroupsLimit = 32767 + if i > encryptedRowGroupsLimit { + return nil, xerrors.New("encrypted files cannot contain more than 32767 column chunks") + } + + if cryptoMetadata.IsSetENCRYPTION_WITH_FOOTER_KEY() { + ctx := CryptoContext{ + StartDecryptWithDictionaryPage: col.HasDictionaryPage(), + RowGroupOrdinal: r.rgMetadata.Ordinal(), + ColumnOrdinal: int16(i), + MetaDecryptor: r.fileDecryptor.GetFooterDecryptorForColumnMeta(""), + DataDecryptor: r.fileDecryptor.GetFooterDecryptorForColumnData(""), + } + return NewPageReader(stream, col.NumValues(), col.Compression(), r.props.Allocator(), &ctx) + } + + // column encrypted with it's own key + columnKeyMeta := cryptoMetadata.GetENCRYPTION_WITH_COLUMN_KEY().KeyMetadata + columnPath := cryptoMetadata.GetENCRYPTION_WITH_COLUMN_KEY().PathInSchema + + ctx := CryptoContext{ + StartDecryptWithDictionaryPage: col.HasDictionaryPage(), + RowGroupOrdinal: r.rgMetadata.Ordinal(), + ColumnOrdinal: int16(i), + MetaDecryptor: r.fileDecryptor.GetColumnMetaDecryptor(parquet.ColumnPath(columnPath).String(), string(columnKeyMeta), ""), + DataDecryptor: r.fileDecryptor.GetColumnDataDecryptor(parquet.ColumnPath(columnPath).String(), string(columnKeyMeta), ""), + } + return NewPageReader(stream, col.NumValues(), col.Compression(), r.props.Allocator(), &ctx) +} diff --git a/go/parquet/go.sum b/go/parquet/go.sum index cf7b6789c2a75..46b4f5a555aa9 100644 --- a/go/parquet/go.sum +++ b/go/parquet/go.sum @@ -75,6 +75,7 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= diff --git a/go/parquet/internal/bmi/bitmap_bmi2_noasm.go b/go/parquet/internal/bmi/bitmap_bmi2_noasm.go new file mode 100644 index 0000000000000..6dc4a39a60e5a --- /dev/null +++ b/go/parquet/internal/bmi/bitmap_bmi2_noasm.go @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build noasm + +package bmi + +func init() { + funclist.extractBits = extractBitsGo + funclist.gtbitmap = greaterThanBitmapGo +} diff --git a/go/parquet/internal/bmi/bmi.go b/go/parquet/internal/bmi/bmi.go index ea0f6e374febe..a12af3e75d8e4 100644 --- a/go/parquet/internal/bmi/bmi.go +++ b/go/parquet/internal/bmi/bmi.go @@ -254,7 +254,7 @@ func extractBitsGo(bitmap, selectBitmap uint64) uint64 { for selectBitmap != 0 { maskLen := bits.OnesCount32(uint32(selectBitmap & lookupMask)) value := pextTable[selectBitmap&lookupMask][bitmap&lookupMask] - bitValue |= uint64(value << bitLen) + bitValue |= uint64(value) << bitLen bitLen += maskLen bitmap >>= lookupBits selectBitmap >>= lookupBits diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go index a33b21a3181f6..bdf1fd56f9825 100644 --- a/go/parquet/internal/encoding/boolean_decoder.go +++ b/go/parquet/internal/encoding/boolean_decoder.go @@ -45,7 +45,7 @@ func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) { unalignedExtract := func(start, end, curBitOffset int) int { i := start - for ; curBitOffset < end; i, curBitOffset = i+1, curBitOffset+1 { + for ; curBitOffset < end && i < max; i, curBitOffset = i+1, curBitOffset+1 { out[i] = (dec.data[0] & byte(1<= 0 && int(min) < len(dc.dict) && int(max) >= 0 && int(max) < len(dc.dict) +} + +// Fill populates the slice passed in entirely with the value at dictionary index indicated by val +func (dc *Int96DictConverter) Fill(out interface{}, val utils.IndexType) error { + o := out.([]parquet.Int96) + if err := dc.ensure(val); err != nil { + return err + } + o[0] = dc.dict[val] + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } + return nil +} + +// FillZero populates the entire slice of out with the zero value for parquet.Int96 +func (dc *Int96DictConverter) FillZero(out interface{}) { + o := out.([]parquet.Int96) + o[0] = dc.zeroVal + for i := 1; i < len(o); i *= 2 { + copy(o[i:], o[:i]) + } +} + +// Copy populates the slice provided with the values in the dictionary at the indexes +// in the vals slice. +func (dc *Int96DictConverter) Copy(out interface{}, vals []utils.IndexType) error { + o := out.([]parquet.Int96) + for idx, val := range vals { + o[idx] = dc.dict[val] + } + return nil +} + // Float32Encoder is the interface for all encoding types that implement encoding // float32 values. type Float32Encoder interface { @@ -1385,6 +1535,8 @@ func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter { return &Int32DictConverter{valueDecoder: dict.(Int32Decoder), dict: make([]int32, 0, dict.ValuesLeft())} case parquet.Types.Int64: return &Int64DictConverter{valueDecoder: dict.(Int64Decoder), dict: make([]int64, 0, dict.ValuesLeft())} + case parquet.Types.Int96: + return &Int96DictConverter{valueDecoder: dict.(Int96Decoder), dict: make([]parquet.Int96, 0, dict.ValuesLeft())} case parquet.Types.Float: return &Float32DictConverter{valueDecoder: dict.(Float32Decoder), dict: make([]float32, 0, dict.ValuesLeft())} case parquet.Types.Double: diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index d2ebbe423e0a7..14c1e9a46f50f 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -56,15 +56,15 @@ type {{.lower}}EncoderTraits struct{} // Encoder returns an encoder for {{.lower}} type data, using the specified encoding type and whether or not // it should be dictionary encoded. -{{- if or (eq .Name "Boolean") (eq .Name "Int96")}} +{{- if or (eq .Name "Boolean") }} // dictionary encoding does not exist for this type and Encoder will panic if useDict is true {{- end }} func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema.Column, mem memory.Allocator) TypedEncoder { if useDict { -{{- if or (eq .Name "Boolean") (eq .Name "Int96")}} +{{- if or (eq .Name "Boolean") }} panic("parquet: no {{.name}} dictionary encoding") {{- else}} - return &Dict{{.Name}}Encoder{newDictEncoderBase(descr, New{{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}{{.Name}}Dictionary(){{else}}BinaryDictionary(mem){{end}}, mem)} + return &Dict{{.Name}}Encoder{newDictEncoderBase(descr, New{{if and (ne .Name "Int96") (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}}{{.Name}}Dictionary(){{else}}BinaryDictionary(mem){{end}}, mem)} {{- end}} } @@ -105,7 +105,7 @@ func ({{.lower}}DecoderTraits) BytesRequired(n int) int { // Decoder returns a decoder for {{.lower}} typed data of the requested encoding type if available func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useDict bool, mem memory.Allocator) TypedDecoder { if useDict { -{{- if and (ne .Name "Boolean") (ne .Name "Int96")}} +{{- if and (ne .Name "Boolean") }} return &Dict{{.Name}}Decoder{dictDecoder{decoder: newDecoderBase(format.Encoding_RLE_DICTIONARY, descr), mem: mem}} {{- else}} panic("dictionary decoding unimplemented for {{.lower}}") @@ -150,7 +150,7 @@ func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, } } -{{if and (ne .Name "Boolean") (ne .Name "Int96")}} +{{if and (ne .Name "Boolean") }} // Dict{{.Name}}Encoder is an encoder for {{.name}} data using dictionary encoding type Dict{{.Name}}Encoder struct { dictEncoder @@ -162,6 +162,12 @@ func (enc *Dict{{.Name}}Encoder) Type() parquet.Type { } {{if and (ne .Name "ByteArray") (ne .Name "FixedLenByteArray")}} +{{if (ne .Name "Int96")}} +// WriteDict populates the byte slice with the dictionary index +func (enc *Dict{{.Name}}Encoder) WriteDict(out []byte) { + enc.memo.CopyValues({{.prefix}}.{{.Name}}Traits.CastFromBytes(out)) +} + // Put encodes the values passed in, adding to the index as needed. func (enc *Dict{{.Name}}Encoder) Put(in []{{.name}}) { for _, val := range in { @@ -179,6 +185,34 @@ func (enc *Dict{{.Name}}Encoder) PutSpaced(in []{{.name}}, validBits []byte, val return nil }) } +{{else}} +// WriteDict populates the byte slice with the dictionary index +func (enc *DictInt96Encoder) WriteDict(out []byte) { + enc.memo.(BinaryMemoTable).CopyFixedWidthValues(0, parquet.Int96SizeBytes, out) +} + +// Put encodes the values passed in, adding to the index as needed +func (enc *DictInt96Encoder) Put(in []parquet.Int96) { + for _, v := range in { + memoIdx, found, err := enc.memo.GetOrInsert(v) + if err != nil { + panic(err) + } + if !found { + enc.dictEncodedSize += parquet.Int96SizeBytes + } + enc.addIndex(memoIdx) + } +} + +// PutSpaced is like Put but assumes space for nulls +func (enc *DictInt96Encoder) PutSpaced(in []parquet.Int96, validBits []byte, validBitsOffset int64) { + utils.VisitSetBitRuns(validBits, validBitsOffset, int64(len(in)), func(pos, length int64) error { + enc.Put(in[pos : pos+length]) + return nil + }) +} +{{end}} {{end}} // Dict{{.Name}}Decoder is a decoder for decoding dictionary encoded data for {{.name}} columns @@ -302,7 +336,7 @@ func (dc *{{.Name}}DictConverter) Copy(out interface{}, vals []utils.IndexType) // decoder as the decoder to decode the dictionary index. func NewDictConverter(dict TypedDecoder) utils.DictionaryConverter { switch dict.Type() { - {{ range .In }}{{ if and (ne .Name "Boolean") (ne .Name "Int96") -}} + {{ range .In }}{{ if and (ne .Name "Boolean") -}} case parquet.Types.{{if .physical }}{{.physical}}{{else}}{{.Name}}{{end}}: return &{{.Name}}DictConverter{valueDecoder: dict.({{.Name}}Decoder), dict: make([]{{.name}}, 0, dict.ValuesLeft())} {{ end }}{{ end -}} diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go new file mode 100644 index 0000000000000..f742f1a561aaf --- /dev/null +++ b/go/parquet/internal/testutils/pagebuilder.go @@ -0,0 +1,297 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutils + +import ( + "encoding/binary" + "io" + "reflect" + + "github.com/apache/arrow/go/arrow/memory" + "github.com/apache/arrow/go/parquet" + "github.com/apache/arrow/go/parquet/compress" + "github.com/apache/arrow/go/parquet/file" + "github.com/apache/arrow/go/parquet/internal/encoding" + "github.com/apache/arrow/go/parquet/internal/utils" + "github.com/apache/arrow/go/parquet/schema" + "github.com/stretchr/testify/mock" +) + +type DataPageBuilder struct { + sink io.Writer + version parquet.DataPageVersion + + nvals int + encoding parquet.Encoding + defLvlEncoding parquet.Encoding + repLvlEncoding parquet.Encoding + defLvlBytesLen int + repLvlBytesLen int + hasDefLvls bool + hasRepLvls bool + hasValues bool +} + +var mem = memory.NewGoAllocator() + +func (d *DataPageBuilder) appendLevels(lvls []int16, maxLvl int16, e parquet.Encoding) int { + if e != parquet.Encodings.RLE { + panic("parquet: only rle encoding currently implemented") + } + + buf := encoding.NewBufferWriter(encoding.LevelEncodingMaxBufferSize(e, maxLvl, len(lvls)), memory.DefaultAllocator) + var enc encoding.LevelEncoder + enc.Init(e, maxLvl, buf) + enc.Encode(lvls) + + rleBytes := enc.Len() + if d.version == parquet.DataPageV1 { + if err := binary.Write(d.sink, binary.LittleEndian, int32(rleBytes)); err != nil { + panic(err) + } + } + + if _, err := d.sink.Write(buf.Bytes()[:rleBytes]); err != nil { + panic(err) + } + return rleBytes +} + +func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16) { + d.defLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE) + + d.nvals = utils.MaxInt(len(lvls), d.nvals) + d.defLvlEncoding = parquet.Encodings.RLE + d.hasDefLvls = true +} + +func (d *DataPageBuilder) AppendRepLevels(lvls []int16, maxLvl int16) { + d.repLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE) + + d.nvals = utils.MaxInt(len(lvls), d.nvals) + d.repLvlEncoding = parquet.Encodings.RLE + d.hasRepLvls = true +} + +func (d *DataPageBuilder) AppendValues(desc *schema.Column, values interface{}, e parquet.Encoding) { + enc := encoding.NewEncoder(desc.PhysicalType(), e, false, desc, mem) + var sz int + switch v := values.(type) { + case []int32: + enc.(encoding.Int32Encoder).Put(v) + sz = len(v) + case []int64: + enc.(encoding.Int64Encoder).Put(v) + sz = len(v) + case []parquet.Int96: + enc.(encoding.Int96Encoder).Put(v) + sz = len(v) + case []float32: + enc.(encoding.Float32Encoder).Put(v) + sz = len(v) + case []float64: + enc.(encoding.Float64Encoder).Put(v) + sz = len(v) + case []parquet.ByteArray: + enc.(encoding.ByteArrayEncoder).Put(v) + sz = len(v) + } + buf, _ := enc.FlushValues() + _, err := d.sink.Write(buf.Bytes()) + if err != nil { + panic(err) + } + + d.nvals = utils.MaxInt(sz, d.nvals) + d.encoding = e + d.hasValues = true +} + +type DictionaryPageBuilder struct { + traits encoding.DictEncoder + numDictValues int32 + hasValues bool +} + +func NewDictionaryPageBuilder(d *schema.Column) *DictionaryPageBuilder { + return &DictionaryPageBuilder{ + encoding.NewEncoder(d.PhysicalType(), parquet.Encodings.Plain, true, d, mem).(encoding.DictEncoder), + 0, false} +} + +func (d *DictionaryPageBuilder) AppendValues(values interface{}) encoding.Buffer { + switch v := values.(type) { + case []int32: + d.traits.(encoding.Int32Encoder).Put(v) + case []int64: + d.traits.(encoding.Int64Encoder).Put(v) + case []parquet.Int96: + d.traits.(encoding.Int96Encoder).Put(v) + case []float32: + d.traits.(encoding.Float32Encoder).Put(v) + case []float64: + d.traits.(encoding.Float64Encoder).Put(v) + case []parquet.ByteArray: + d.traits.(encoding.ByteArrayEncoder).Put(v) + } + + d.numDictValues = int32(d.traits.NumEntries()) + d.hasValues = true + buf, _ := d.traits.FlushValues() + return buf +} + +func (d *DictionaryPageBuilder) WriteDict() *memory.Buffer { + buf := memory.NewBufferBytes(make([]byte, d.traits.DictEncodedSize())) + d.traits.WriteDict(buf.Bytes()) + return buf +} + +func (d *DictionaryPageBuilder) NumValues() int32 { + return d.numDictValues +} + +func MakeDataPage(dataPageVersion parquet.DataPageVersion, d *schema.Column, values interface{}, nvals int, e parquet.Encoding, indexBuffer encoding.Buffer, defLvls, repLvls []int16, maxDef, maxRep int16) file.Page { + num := 0 + + stream := encoding.NewBufferWriter(1024, mem) + builder := DataPageBuilder{sink: stream, version: dataPageVersion} + + if len(repLvls) > 0 { + builder.AppendRepLevels(repLvls, maxRep) + } + if len(defLvls) > 0 { + builder.AppendDefLevels(defLvls, maxDef) + } + + if e == parquet.Encodings.Plain { + builder.AppendValues(d, values, e) + num = builder.nvals + } else { + stream.Write(indexBuffer.Bytes()) + num = utils.MaxInt(builder.nvals, nvals) + } + + buf := stream.Finish() + if dataPageVersion == parquet.DataPageV1 { + return file.NewDataPageV1(buf, int32(num), e, builder.defLvlEncoding, builder.repLvlEncoding, int64(buf.Len())) + } + return file.NewDataPageV2(buf, int32(num), 0, int32(num), e, int32(builder.defLvlBytesLen), int32(builder.repLvlBytesLen), int64(buf.Len()), false) +} + +func MakeDictPage(d *schema.Column, values interface{}, valuesPerPage []int, e parquet.Encoding) (*file.DictionaryPage, []encoding.Buffer) { + bldr := NewDictionaryPageBuilder(d) + npages := len(valuesPerPage) + + ref := reflect.ValueOf(values) + valStart := 0 + + rleIndices := make([]encoding.Buffer, 0, npages) + for _, nvals := range valuesPerPage { + rleIndices = append(rleIndices, bldr.AppendValues(ref.Slice(valStart, valStart+nvals).Interface())) + valStart += nvals + } + + buffer := bldr.WriteDict() + return file.NewDictionaryPage(buffer, bldr.NumValues(), parquet.Encodings.Plain), rleIndices +} + +type MockPageReader struct { + mock.Mock + + curpage int +} + +func (m *MockPageReader) Err() error { + return m.Called().Error(0) +} + +func (m *MockPageReader) Reset(parquet.ReaderAtSeeker, int64, compress.Compression, *file.CryptoContext) { +} + +func (m *MockPageReader) SetMaxPageHeaderSize(int) {} + +func (m *MockPageReader) Page() file.Page { + return m.TestData().Get("pages").Data().([]file.Page)[m.curpage-1] +} + +func (m *MockPageReader) Next() bool { + pageList := m.TestData().Get("pages").Data().([]file.Page) + m.curpage++ + return len(pageList) >= m.curpage +} + +func PaginatePlain(version parquet.DataPageVersion, d *schema.Column, values reflect.Value, defLevels, repLevels []int16, + maxDef, maxRep int16, lvlsPerPage int, valuesPerPage []int, enc parquet.Encoding) []file.Page { + + var ( + npages = len(valuesPerPage) + defLvlStart = 0 + defLvlEnd = 0 + repLvlStart = 0 + repLvlEnd = 0 + valueStart = 0 + ) + + pageList := make([]file.Page, 0, npages) + for i := 0; i < npages; i++ { + if maxDef > 0 { + defLvlStart = i * lvlsPerPage + defLvlEnd = (i + 1) * lvlsPerPage + } + if maxRep > 0 { + repLvlStart = i * lvlsPerPage + repLvlEnd = (i + 1) * lvlsPerPage + } + + page := MakeDataPage(version, d, + values.Slice(valueStart, valueStart+valuesPerPage[i]).Interface(), + valuesPerPage[i], enc, nil, defLevels[defLvlStart:defLvlEnd], + repLevels[repLvlStart:repLvlEnd], maxDef, maxRep) + valueStart += valuesPerPage[i] + pageList = append(pageList, page) + } + return pageList +} + +func PaginateDict(version parquet.DataPageVersion, d *schema.Column, values reflect.Value, defLevels, repLevels []int16, maxDef, maxRep int16, lvlsPerPage int, valuesPerPage []int, enc parquet.Encoding) []file.Page { + var ( + npages = len(valuesPerPage) + pages = make([]file.Page, 0, npages) + defStart = 0 + defEnd = 0 + repStart = 0 + repEnd = 0 + ) + + dictPage, rleIndices := MakeDictPage(d, values.Interface(), valuesPerPage, enc) + pages = append(pages, dictPage) + for i := 0; i < npages; i++ { + if maxDef > 0 { + defStart = i * lvlsPerPage + defEnd = (i + 1) * lvlsPerPage + } + if maxRep > 0 { + repStart = i * lvlsPerPage + repEnd = (i + 1) * lvlsPerPage + } + page := MakeDataPage(version, d, nil, valuesPerPage[i], enc, rleIndices[i], + defLevels[defStart:defEnd], repLevels[repStart:repEnd], maxDef, maxRep) + pages = append(pages, page) + } + return pages +} diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go index 92abae57dc1c2..7e99d9f68705a 100644 --- a/go/parquet/reader_properties.go +++ b/go/parquet/reader_properties.go @@ -20,7 +20,6 @@ import ( "bytes" "io" - "github.com/apache/arrow/go/arrow/ipc" "github.com/apache/arrow/go/arrow/memory" "golang.org/x/xerrors" ) @@ -61,7 +60,7 @@ func (r *ReaderProperties) Allocator() memory.Allocator { return r.alloc } // // If BufferedStreamEnabled is true, it creates an io.SectionReader, otherwise it will read the entire section // into a buffer in memory and return a bytes.NewReader for that buffer. -func (r *ReaderProperties) GetStream(source io.ReaderAt, start, nbytes int64) (ipc.ReadAtSeeker, error) { +func (r *ReaderProperties) GetStream(source io.ReaderAt, start, nbytes int64) (ReaderAtSeeker, error) { if r.BufferedStreamEnabled { return io.NewSectionReader(source, start, nbytes), nil } diff --git a/go/parquet/types.go b/go/parquet/types.go index e568984ebe39c..630244ca8e9df 100644 --- a/go/parquet/types.go +++ b/go/parquet/types.go @@ -18,6 +18,7 @@ package parquet import ( "encoding/binary" + "io" "reflect" "strings" "time" @@ -47,6 +48,15 @@ var ( FixedLenByteArraySizeBytes int = int(reflect.TypeOf(FixedLenByteArray{}).Size()) ) +// ReaderAtSeeker is a combination of the ReaderAt and ReadSeeker interfaces +// from the io package defining the only functionality that is required +// in order for a parquet file to be read by the file functions. We just need +// to be able to call ReadAt, Read, and Seek +type ReaderAtSeeker interface { + io.ReaderAt + io.ReadSeeker +} + // NewInt96 creates a new Int96 from the given 3 uint32 values. func NewInt96(v [3]uint32) (out Int96) { binary.LittleEndian.PutUint32(out[0:], v[0]) From e7fca7756063def4c1583b9109067a65ddcb7a53 Mon Sep 17 00:00:00 2001 From: Benson Muite Date: Sun, 24 Oct 2021 05:48:00 +0900 Subject: [PATCH 010/194] ARROW-14451: [Release][Ruby] The `--path` flag is deprecated Closes #11528 from bkmgit/ARROW-14451 Authored-by: Benson Muite Signed-off-by: Sutou Kouhei --- dev/release/verify-release-candidate.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 212547744c4e4..63561a12b87fc 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -444,7 +444,8 @@ test_ruby() { for module in ${modules}; do pushd ${module} - bundle install --path vendor/bundle + bundle config set --local path 'vendor/bundle' + bundle install bundle exec ruby test/run-test.rb popd done From e7158c62ae43cbcea3f90c11dcbb40ffbbc94484 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Sat, 23 Oct 2021 17:21:15 -0400 Subject: [PATCH 011/194] ARROW-13984: [Go][Parquet] File readers Looks like I merged #11146 before it finished sync'ing to the apache mirror and was missing a few commits. Here's the missing ones. Closes #11530 from zeroshade/goparquet-file Authored-by: Matthew Topol Signed-off-by: Matthew Topol --- go/parquet/file/page_reader.go | 16 ++++++++++++---- go/parquet/file/row_group_reader.go | 10 ++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/go/parquet/file/page_reader.go b/go/parquet/file/page_reader.go index 251499af21ce7..5c36b338ba9f8 100644 --- a/go/parquet/file/page_reader.go +++ b/go/parquet/file/page_reader.go @@ -26,7 +26,6 @@ import ( "github.com/apache/arrow/go/arrow/memory" "github.com/apache/arrow/go/parquet" "github.com/apache/arrow/go/parquet/compress" - "github.com/apache/arrow/go/parquet/internal/debug" "github.com/apache/arrow/go/parquet/internal/encryption" format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet" "github.com/apache/arrow/go/parquet/internal/thrift" @@ -512,7 +511,10 @@ func (p *serializedPageReader) Next() bool { p.err = err return false } - debug.Assert(len(data) == lenUncompressed, "len(data) != lenUncompressed") + if len(data) != lenUncompressed { + p.err = xerrors.Errorf("parquet: metadata said %d bytes uncompressed dictionary page, got %d bytes", lenUncompressed, len(data)) + return false + } // p.buf.Resize(lenUncompressed) // make dictionary page @@ -540,7 +542,10 @@ func (p *serializedPageReader) Next() bool { p.err = err return false } - debug.Assert(len(data) == lenUncompressed, "len(data) != lenUncompressed") + if len(data) != lenUncompressed { + p.err = xerrors.Errorf("parquet: metadata said %d bytes uncompressed data page, got %d bytes", lenUncompressed, len(data)) + return false + } // make datapagev1 p.curPage = &DataPageV1{ @@ -589,7 +594,10 @@ func (p *serializedPageReader) Next() bool { io.ReadFull(p.r, p.buf.Bytes()) data = p.buf.Bytes() } - debug.Assert(len(data) == lenUncompressed, "len(data) != lenUncompressed") + if len(data) != lenUncompressed { + p.err = xerrors.Errorf("parquet: metadata said %d bytes uncompressed data page, got %d bytes", lenUncompressed, len(data)) + return false + } // make datapage v2 p.curPage = &DataPageV2{ diff --git a/go/parquet/file/row_group_reader.go b/go/parquet/file/row_group_reader.go index 9c74a25c11eca..455144e266ac0 100644 --- a/go/parquet/file/row_group_reader.go +++ b/go/parquet/file/row_group_reader.go @@ -79,7 +79,17 @@ func (r *RowGroupReader) GetColumnPageReader(i int) (PageReader, error) { } colLen := col.TotalCompressedSize() + // PARQUET-816 workaround for old files created by older parquet-mr if r.fileMetadata.WriterVersion().LessThan(metadata.Parquet816FixedVersion) { + // The Parquet MR writer had a bug in 1.2.8 and below where it didn't include the + // dictionary page header size in total_compressed_size and total_uncompressed_size + // (see IMPALA-694). We add padding to compensate. + if colStart < 0 || colLen < 0 { + return nil, xerrors.Errorf("invalid column chunk metadata, offset (%d) and length (%d) should both be positive", colStart, colLen) + } + if colStart > r.sourceSz || colLen > r.sourceSz { + return nil, xerrors.Errorf("invalid column chunk metadata, offset (%d) and length (%d) must both be less than total source size (%d)", colStart, colLen, r.sourceSz) + } bytesRemain := r.sourceSz - (colStart + colLen) padding := utils.Min(maxDictHeaderSize, bytesRemain) colLen += padding From be665ef948cb2c6706c60053c5db918e948713e8 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Mon, 25 Oct 2021 08:53:12 -0400 Subject: [PATCH 012/194] ARROW-13879: [C++] Mixed support for binary types in regex functions This PR extends variable-width binary types support for string functions: * find_substring[_regex] * count_substring[_regex] * match_substring[_regex] * split_pattern[_regex] * replace_substring[_regex] * match_like * starts/ends_with * extract_regex Also, updates several scalar string kernel/function registrations. Closes #11233 from edponce/ARROW-13879-Mixed-support-for-binary-types-in-regex- Authored-by: Eduardo Ponce Signed-off-by: David Li --- cpp/src/arrow/array/array_binary_test.cc | 12 +- .../arrow/compute/kernels/aggregate_test.cc | 4 +- .../arrow/compute/kernels/codegen_internal.h | 26 +- .../compute/kernels/scalar_if_else_test.cc | 8 +- .../compute/kernels/scalar_set_lookup_test.cc | 4 +- .../arrow/compute/kernels/scalar_string.cc | 603 ++++++++++-------- .../compute/kernels/scalar_string_test.cc | 455 ++++++++++--- cpp/src/arrow/compute/kernels/test_util.cc | 4 +- .../arrow/compute/kernels/vector_hash_test.cc | 2 +- .../compute/kernels/vector_replace_test.cc | 2 +- .../compute/kernels/vector_selection_test.cc | 6 +- cpp/src/arrow/testing/gtest_util.h | 4 +- docs/source/cpp/compute.rst | 52 +- 13 files changed, 759 insertions(+), 423 deletions(-) diff --git a/cpp/src/arrow/array/array_binary_test.cc b/cpp/src/arrow/array/array_binary_test.cc index 6892e5f0a91f7..7840c60f8974d 100644 --- a/cpp/src/arrow/array/array_binary_test.cc +++ b/cpp/src/arrow/array/array_binary_test.cc @@ -324,7 +324,7 @@ class TestStringArray : public ::testing::Test { std::shared_ptr strings_; }; -TYPED_TEST_SUITE(TestStringArray, BinaryArrowTypes); +TYPED_TEST_SUITE(TestStringArray, BaseBinaryArrowTypes); TYPED_TEST(TestStringArray, TestArrayBasics) { this->TestArrayBasics(); } @@ -661,7 +661,7 @@ class TestStringBuilder : public TestBuilder { std::shared_ptr result_; }; -TYPED_TEST_SUITE(TestStringBuilder, BinaryArrowTypes); +TYPED_TEST_SUITE(TestStringBuilder, BaseBinaryArrowTypes); TYPED_TEST(TestStringBuilder, TestScalarAppend) { this->TestScalarAppend(); } @@ -863,7 +863,7 @@ struct BinaryAppender { }; template -class TestBinaryDataVisitor : public ::testing::Test { +class TestBaseBinaryDataVisitor : public ::testing::Test { public: using TypeClass = T; @@ -891,10 +891,10 @@ class TestBinaryDataVisitor : public ::testing::Test { std::shared_ptr type_; }; -TYPED_TEST_SUITE(TestBinaryDataVisitor, BinaryArrowTypes); +TYPED_TEST_SUITE(TestBaseBinaryDataVisitor, BaseBinaryArrowTypes); -TYPED_TEST(TestBinaryDataVisitor, Basics) { this->TestBasics(); } +TYPED_TEST(TestBaseBinaryDataVisitor, Basics) { this->TestBasics(); } -TYPED_TEST(TestBinaryDataVisitor, Sliced) { this->TestSliced(); } +TYPED_TEST(TestBaseBinaryDataVisitor, Sliced) { this->TestSliced(); } } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index 992f73698648d..fe940006cb263 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -1647,7 +1647,7 @@ TEST(TestNullMinMaxKernel, Basics) { template class TestBaseBinaryMinMaxKernel : public ::testing::Test {}; -TYPED_TEST_SUITE(TestBaseBinaryMinMaxKernel, BinaryArrowTypes); +TYPED_TEST_SUITE(TestBaseBinaryMinMaxKernel, BaseBinaryArrowTypes); TYPED_TEST(TestBaseBinaryMinMaxKernel, Basics) { std::vector chunked_input1 = {R"(["cc", "", "aa", "b", "c"])", R"(["d", "", null, "b", "c"])"}; @@ -2249,7 +2249,7 @@ TYPED_TEST(TestBooleanIndexKernel, Basics) { template class TestStringIndexKernel : public TestIndexKernel {}; -TYPED_TEST_SUITE(TestStringIndexKernel, BinaryArrowTypes); +TYPED_TEST_SUITE(TestStringIndexKernel, BaseBinaryArrowTypes); TYPED_TEST(TestStringIndexKernel, Basics) { auto buffer = Buffer::FromString("foo"); auto value = std::make_shared(buffer); diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 438362585b5ed..2a1167c48e273 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -1199,7 +1199,7 @@ ArrayKernelExec GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) { } } -// similar to GenerateTypeAgnosticPrimitive, but for variable types +// similar to GenerateTypeAgnosticPrimitive, but for base variable binary types template