New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-5155: [GLib][Ruby] Add support for building union arrays from data type #4127
Changes from 3 commits
c8793d5
5ad5572
606a04c
f1bfa07
d550dc9
f82ac3d
e625556
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -366,6 +366,54 @@ garrow_sparse_union_array_new(GArrowInt8Array *type_ids, | |
} | ||
} | ||
|
||
/** | ||
* garrow_sparse_union_array_new_data_type: | ||
* @data_type: The data type for the sparse array. | ||
* @type_ids: The field type IDs for each value as #GArrowInt8Array. | ||
* @fields: (element-type GArrowArray): The arrays for each field | ||
* as #GList of #GArrowArray. | ||
* @error: (nullable): Return location for a #GError or %NULL. | ||
* | ||
* Returns: (nullable): A newly created #GArrowSparseUnionArray | ||
* or %NULL on error. | ||
* | ||
* Since: 0.14.0 | ||
*/ | ||
GArrowSparseUnionArray * | ||
garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, | ||
GArrowInt8Array *type_ids, | ||
GList *fields, | ||
GError **error) | ||
{ | ||
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); | ||
auto arrow_union_data_type = | ||
std::static_pointer_cast<arrow::UnionType>(arrow_data_type); | ||
std::vector<std::string> arrow_field_names; | ||
for (const auto &arrow_field : arrow_union_data_type->children()) { | ||
arrow_field_names.push_back(arrow_field->name()); | ||
} | ||
std::vector<uint8_t> arrow_type_codes(arrow_union_data_type->type_codes()); | ||
auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids)); | ||
std::vector<std::shared_ptr<arrow::Array>> arrow_fields; | ||
for (auto node = fields; node; node = node->next) { | ||
auto *field = GARROW_ARRAY(node->data); | ||
arrow_fields.push_back(garrow_array_get_raw(field)); | ||
} | ||
std::shared_ptr<arrow::Array> arrow_union_array; | ||
auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids, | ||
arrow_fields, | ||
arrow_field_names, | ||
arrow_type_codes, | ||
&arrow_union_array); | ||
if (garrow_error_check(error, | ||
status, | ||
"[sparse-union-array][new][data-type]")) { | ||
return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array)); | ||
} else { | ||
return NULL; | ||
} | ||
} | ||
|
||
|
||
G_DEFINE_TYPE(GArrowDenseUnionArray, | ||
garrow_dense_union_array, | ||
|
@@ -420,6 +468,57 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids, | |
} | ||
} | ||
|
||
/** | ||
* garrow_dense_union_array_new_data_type: | ||
* @data_type: The data type for the sparse array. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "sparse" -> "dense" |
||
* @type_ids: The field type IDs for each value as #GArrowInt8Array. | ||
* @value_offsets: The value offsets for each value as #GArrowInt32Array. | ||
* Each offset is counted for each type. | ||
* @fields: (element-type GArrowArray): The arrays for each field | ||
* as #GList of #GArrowArray. | ||
* @error: (nullable): Return location for a #GError or %NULL. | ||
* | ||
* Returns: (nullable): A newly created #GArrowSparseUnionArray | ||
* or %NULL on error. | ||
* | ||
* Since: 0.14.0 | ||
*/ | ||
GArrowDenseUnionArray * | ||
garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, | ||
GArrowInt8Array *type_ids, | ||
GArrowInt32Array *value_offsets, | ||
GList *fields, | ||
GError **error) | ||
{ | ||
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); | ||
auto arrow_union_data_type = | ||
std::static_pointer_cast<arrow::UnionType>(arrow_data_type); | ||
std::vector<std::string> arrow_field_names; | ||
for (const auto &arrow_field : arrow_union_data_type->children()) { | ||
arrow_field_names.push_back(arrow_field->name()); | ||
} | ||
std::vector<uint8_t> arrow_type_codes(arrow_union_data_type->type_codes()); | ||
auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids)); | ||
auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets)); | ||
std::vector<std::shared_ptr<arrow::Array>> arrow_fields; | ||
for (auto node = fields; node; node = node->next) { | ||
auto *field = GARROW_ARRAY(node->data); | ||
arrow_fields.push_back(garrow_array_get_raw(field)); | ||
} | ||
std::shared_ptr<arrow::Array> arrow_union_array; | ||
auto status = arrow::UnionArray::MakeDense(*arrow_type_ids, | ||
*arrow_value_offsets, | ||
arrow_fields, | ||
arrow_field_names, | ||
arrow_type_codes, | ||
&arrow_union_array); | ||
if (garrow_error_check(error, status, "[dense-union-array][new][data-type]")) { | ||
return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array)); | ||
} else { | ||
return NULL; | ||
} | ||
} | ||
|
||
|
||
G_DEFINE_TYPE(GArrowDictionaryArray, | ||
garrow_dictionary_array, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,33 +18,69 @@ | |
class TestDenseUnionArray < Test::Unit::TestCase | ||
include Helper::Buildable | ||
|
||
def setup | ||
type_ids = build_int8_array([0, 1, nil, 1, 1]) | ||
value_offsets = build_int32_array([0, 0, 0, 1, 2]) | ||
fields = [ | ||
build_int16_array([1]), | ||
build_string_array(["a", "b", "c"]), | ||
] | ||
@array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields) | ||
end | ||
sub_test_case(".new") do | ||
def setup | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you put more sub test cases under ".new" sub test case? sub_test_case(".new") do
sub_test_case("default") do # or "no DataType"?
end
sub_test_case("DataType") do
end
end |
||
type_ids = build_int8_array([0, 1, nil, 1, 1]) | ||
value_offsets = build_int32_array([0, 0, 0, 1, 2]) | ||
fields = [ | ||
build_int16_array([1]), | ||
build_string_array(["a", "b", "c"]), | ||
] | ||
@array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields) | ||
end | ||
|
||
def test_value_data_type | ||
fields = [ | ||
Arrow::Field.new("0", Arrow::Int16DataType.new), | ||
Arrow::Field.new("1", Arrow::StringDataType.new), | ||
] | ||
assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]), | ||
@array.value_data_type) | ||
end | ||
|
||
def test_value_data_type | ||
fields = [ | ||
Arrow::Field.new("0", Arrow::Int16DataType.new), | ||
Arrow::Field.new("1", Arrow::StringDataType.new), | ||
] | ||
assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]), | ||
@array.value_data_type) | ||
def test_field | ||
assert_equal([ | ||
build_int16_array([1]), | ||
build_string_array(["a", "b", "c"]), | ||
], | ||
[ | ||
@array.get_field(0), | ||
@array.get_field(1), | ||
]) | ||
end | ||
end | ||
|
||
def test_field | ||
assert_equal([ | ||
build_int16_array([1]), | ||
build_string_array(["a", "b", "c"]), | ||
], | ||
[ | ||
@array.get_field(0), | ||
@array.get_field(1), | ||
]) | ||
sub_test_case("DataType") do | ||
def setup | ||
data_type_fields = [ | ||
Arrow::Field.new("number", Arrow::Int16DataType.new), | ||
Arrow::Field.new("text", Arrow::StringDataType.new), | ||
] | ||
type_codes = [11, 13] | ||
@data_type = Arrow::DenseUnionDataType.new(data_type_fields, type_codes) | ||
type_ids = build_int8_array([0, 1, nil, 1, 0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, type ids must be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should add element accessor to union arrays and test union array values later. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand it. |
||
value_offsets = build_int32_array([0, 0, 0, 1, 2]) | ||
fields = [ | ||
build_int16_array([1]), | ||
build_string_array(["a", "b", "c"]) | ||
] | ||
@array = Arrow::DenseUnionArray.new(@data_type, type_ids, value_offsets, fields) | ||
end | ||
|
||
def test_value_data_type | ||
assert_equal(@data_type, | ||
@array.value_data_type) | ||
end | ||
|
||
def test_field | ||
assert_equal([ | ||
build_int16_array([1]), | ||
build_string_array(["a", "b", "c"]), | ||
], | ||
[ | ||
@array.get_field(0), | ||
@array.get_field(1), | ||
]) | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,32 +18,67 @@ | |
class TestSparseUnionArray < Test::Unit::TestCase | ||
include Helper::Buildable | ||
|
||
def setup | ||
type_ids = build_int8_array([0, 1, nil, 1, 0]) | ||
fields = [ | ||
build_int16_array([1, nil, nil, nil, 5]), | ||
build_string_array([nil, "b", nil, "d", nil]), | ||
] | ||
@array = Arrow::SparseUnionArray.new(type_ids, fields) | ||
end | ||
sub_test_case(".new") do | ||
def setup | ||
type_ids = build_int8_array([0, 1, nil, 1, 0]) | ||
fields = [ | ||
build_int16_array([1, nil, nil, nil, 5]), | ||
build_string_array([nil, "b", nil, "d", nil]), | ||
] | ||
@array = Arrow::SparseUnionArray.new(type_ids, fields) | ||
end | ||
|
||
def test_value_data_type | ||
fields = [ | ||
Arrow::Field.new("0", Arrow::Int16DataType.new), | ||
Arrow::Field.new("1", Arrow::StringDataType.new), | ||
] | ||
assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]), | ||
@array.value_data_type) | ||
end | ||
|
||
def test_value_data_type | ||
fields = [ | ||
Arrow::Field.new("0", Arrow::Int16DataType.new), | ||
Arrow::Field.new("1", Arrow::StringDataType.new), | ||
] | ||
assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]), | ||
@array.value_data_type) | ||
def test_field | ||
assert_equal([ | ||
build_int16_array([1, nil, nil, nil, 5]), | ||
build_string_array([nil, "b", nil, "d", nil]), | ||
], | ||
[ | ||
@array.get_field(0), | ||
@array.get_field(1), | ||
]) | ||
end | ||
end | ||
|
||
def test_field | ||
assert_equal([ | ||
build_int16_array([1, nil, nil, nil, 5]), | ||
build_string_array([nil, "b", nil, "d", nil]), | ||
], | ||
[ | ||
@array.get_field(0), | ||
@array.get_field(1), | ||
]) | ||
sub_test_case("DataType") do | ||
def setup | ||
data_type_fields = [ | ||
Arrow::Field.new("number", Arrow::Int16DataType.new), | ||
Arrow::Field.new("text", Arrow::StringDataType.new), | ||
] | ||
type_codes = [11, 13] | ||
@data_type = Arrow::SparseUnionDataType.new(data_type_fields, type_codes) | ||
type_ids = build_int8_array([0, 1, nil, 1, 0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this right? |
||
fields = [ | ||
build_int16_array([1, nil, nil, nil, 5]), | ||
build_string_array([nil, "b", nil, "d", nil]), | ||
] | ||
@array = Arrow::SparseUnionArray.new(@data_type, type_ids, fields) | ||
end | ||
|
||
def test_value_data_type | ||
assert_equal(@data_type, | ||
@array.value_data_type) | ||
end | ||
|
||
def test_field | ||
assert_equal([ | ||
build_int16_array([1, nil, nil, nil, 5]), | ||
build_string_array([nil, "b", nil, "d", nil]), | ||
], | ||
[ | ||
@array.get_field(0), | ||
@array.get_field(1), | ||
]) | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use
arrow_union_data_type->type_codes()
directly forarrow::UnionArray::MakeSparse()
instead of copying it?