Skip to content

Commit

Permalink
ARROW-6506: [C++] Fix validation of ExtensionArray with struct storag…
Browse files Browse the repository at this point in the history
…e type

https://issues.apache.org/jira/browse/ARROW-6506

Closes #5340 from jorisvandenbossche/ARROW-6506-nested-extension-type-validation and squashes the following commits:

1bce476 <Joris Van den Bossche> run clang-format
032fe77 <Joris Van den Bossche> lint error
5ad4a70 <Joris Van den Bossche> ARROW-6506:  Fix validation of ExtensionArray with struct storage type

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
jorisvandenbossche authored and pitrou committed Sep 11, 2019
1 parent 3437c97 commit 1d27386
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 5 deletions.
12 changes: 7 additions & 5 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1447,11 +1447,13 @@ Status Array::Validate() const {
"of type ",
type.ToString(), ", got ", data.buffers.size());
}
if (data.child_data.size() != static_cast<size_t>(type.num_children())) {
return Status::Invalid("Expected ", type.num_children(),
" child arrays in array "
"of type ",
type.ToString(), ", got ", data.child_data.size());
if (type.id() != Type::EXTENSION) {
if (data.child_data.size() != static_cast<size_t>(type.num_children())) {
return Status::Invalid("Expected ", type.num_children(),
" child arrays in array "
"of type ",
type.ToString(), ", got ", data.child_data.size());
}
}
if (layout.has_dictionary && !data.dictionary) {
return Status::Invalid("Array of type ", type.ToString(),
Expand Down
59 changes: 59 additions & 0 deletions cpp/src/arrow/extension_type_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,44 @@ class Parametric2Type : public ExtensionType {
int32_t parameter_;
};

// An extension type with a non-primitive storage type
class ExtStructArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};

class ExtStructType : public ExtensionType {
public:
ExtStructType()
: ExtensionType(struct_({field("a", int64()), field("b", float64())})) {}

std::string extension_name() const override { return "ext-struct-type"; }

bool ExtensionEquals(const ExtensionType& other) const override {
const auto& other_ext = static_cast<const ExtensionType&>(other);
if (other_ext.extension_name() != this->extension_name()) {
return false;
}
return true;
}

std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
return std::make_shared<ExtStructArray>(data);
}

Status Deserialize(std::shared_ptr<DataType> storage_type,
const std::string& serialized,
std::shared_ptr<DataType>* out) const override {
if (serialized != "ext-struct-type-unique-code") {
return Status::Invalid("Type identifier did not match");
}
*out = std::make_shared<ExtStructType>();
return Status::OK();
}

std::string Serialize() const override { return "ext-struct-type-unique-code"; }
};

class TestExtensionType : public ::testing::Test {
public:
void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<UUIDType>())); }
Expand Down Expand Up @@ -340,4 +378,25 @@ TEST_F(TestExtensionType, ParametricEquals) {
ASSERT_EQ(p1_type->fingerprint(), "");
}

std::shared_ptr<Array> ExampleStruct() {
auto ext_type = std::make_shared<ExtStructType>();
auto storage_type = ext_type->storage_type();
auto arr = ArrayFromJSON(storage_type, "[[1, 0.1], [2, 0.2]]");

auto ext_data = arr->data()->Copy();
ext_data->type = ext_type;
return MakeArray(ext_data);
}

TEST_F(TestExtensionType, ValidateExtensionArray) {
auto ext_arr1 = ExampleUUID();
auto p1_type = std::make_shared<Parametric1Type>(6);
auto ext_arr2 = ExampleParametric(p1_type, "[null, 1, 2, 3]");
auto ext_arr3 = ExampleStruct();

ASSERT_OK(ext_arr1->Validate());
ASSERT_OK(ext_arr2->Validate());
ASSERT_OK(ext_arr3->Validate());
}

} // namespace arrow

0 comments on commit 1d27386

Please sign in to comment.