Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-5155: [GLib][Ruby] Add support for building union arrays from data type #4127

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
97 changes: 97 additions & 0 deletions c_glib/arrow-glib/composite-array.cpp
Expand Up @@ -366,6 +366,53 @@ garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
}
}

/**
* garrow_sparse_union_array_new_data_type:
* @data_type: The data type for the sparse array.
* @type_ids: The field type IDs for each value as #GArrowInt8Array.
* @fields: (element-type GArrowArray): The arrays for each field
* as #GList of #GArrowArray.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable): A newly created #GArrowSparseUnionArray
* or %NULL on error.
*
* Since: 0.14.0
*/
GArrowSparseUnionArray *
garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
GArrowInt8Array *type_ids,
GList *fields,
GError **error)
{
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
auto arrow_union_data_type =
std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
std::vector<std::string> arrow_field_names;
for (const auto &arrow_field : arrow_union_data_type->children()) {
arrow_field_names.push_back(arrow_field->name());
}
auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
for (auto node = fields; node; node = node->next) {
auto *field = GARROW_ARRAY(node->data);
arrow_fields.push_back(garrow_array_get_raw(field));
}
std::shared_ptr<arrow::Array> arrow_union_array;
auto status = arrow::UnionArray::MakeSparse(*arrow_type_ids,
arrow_fields,
arrow_field_names,
arrow_union_data_type->type_codes(),
&arrow_union_array);
if (garrow_error_check(error,
status,
"[sparse-union-array][new][data-type]")) {
return GARROW_SPARSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
} else {
return NULL;
}
}


G_DEFINE_TYPE(GArrowDenseUnionArray,
garrow_dense_union_array,
Expand Down Expand Up @@ -420,6 +467,56 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids,
}
}

/**
* garrow_dense_union_array_new_data_type:
* @data_type: The data type for the dense array.
* @type_ids: The field type IDs for each value as #GArrowInt8Array.
* @value_offsets: The value offsets for each value as #GArrowInt32Array.
* Each offset is counted for each type.
* @fields: (element-type GArrowArray): The arrays for each field
* as #GList of #GArrowArray.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (nullable): A newly created #GArrowSparseUnionArray
* or %NULL on error.
*
* Since: 0.14.0
*/
GArrowDenseUnionArray *
garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
GArrowInt8Array *type_ids,
GArrowInt32Array *value_offsets,
GList *fields,
GError **error)
{
auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
auto arrow_union_data_type =
std::static_pointer_cast<arrow::UnionType>(arrow_data_type);
std::vector<std::string> arrow_field_names;
for (const auto &arrow_field : arrow_union_data_type->children()) {
arrow_field_names.push_back(arrow_field->name());
}
auto arrow_type_ids = garrow_array_get_raw(GARROW_ARRAY(type_ids));
auto arrow_value_offsets = garrow_array_get_raw(GARROW_ARRAY(value_offsets));
std::vector<std::shared_ptr<arrow::Array>> arrow_fields;
for (auto node = fields; node; node = node->next) {
auto *field = GARROW_ARRAY(node->data);
arrow_fields.push_back(garrow_array_get_raw(field));
}
std::shared_ptr<arrow::Array> arrow_union_array;
auto status = arrow::UnionArray::MakeDense(*arrow_type_ids,
*arrow_value_offsets,
arrow_fields,
arrow_field_names,
arrow_union_data_type->type_codes(),
&arrow_union_array);
if (garrow_error_check(error, status, "[dense-union-array][new][data-type]")) {
return GARROW_DENSE_UNION_ARRAY(garrow_array_new_raw(&arrow_union_array));
} else {
return NULL;
}
}


G_DEFINE_TYPE(GArrowDictionaryArray,
garrow_dictionary_array,
Expand Down
11 changes: 11 additions & 0 deletions c_glib/arrow-glib/composite-array.h
Expand Up @@ -108,6 +108,11 @@ GArrowSparseUnionArray *
garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
GList *fields,
GError **error);
GArrowSparseUnionArray *
garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
GArrowInt8Array *type_ids,
GList *fields,
GError **error);


#define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type())
Expand All @@ -126,6 +131,12 @@ garrow_dense_union_array_new(GArrowInt8Array *type_ids,
GArrowInt32Array *value_offsets,
GList *fields,
GError **error);
GArrowDenseUnionArray *
garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
GArrowInt8Array *type_ids,
GArrowInt32Array *value_offsets,
GList *fields,
GError **error);


#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
Expand Down
90 changes: 64 additions & 26 deletions c_glib/test/test-dense-union-array.rb
Expand Up @@ -18,33 +18,71 @@
class TestDenseUnionArray < Test::Unit::TestCase
include Helper::Buildable

def setup
type_ids = build_int8_array([0, 1, nil, 1, 1])
value_offsets = build_int32_array([0, 0, 0, 1, 2])
fields = [
build_int16_array([1]),
build_string_array(["a", "b", "c"]),
]
@array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
end
sub_test_case(".new") do
sub_test_case("default") do
def setup
type_ids = build_int8_array([0, 1, nil, 1, 1])
value_offsets = build_int32_array([0, 0, 0, 1, 2])
fields = [
build_int16_array([1]),
build_string_array(["a", "b", "c"]),
]
@array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
end

def test_value_data_type
fields = [
Arrow::Field.new("0", Arrow::Int16DataType.new),
Arrow::Field.new("1", Arrow::StringDataType.new),
]
assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
@array.value_data_type)
end
def test_value_data_type
fields = [
Arrow::Field.new("0", Arrow::Int16DataType.new),
Arrow::Field.new("1", Arrow::StringDataType.new),
]
assert_equal(Arrow::DenseUnionDataType.new(fields, [0, 1]),
@array.value_data_type)
end

def test_field
assert_equal([
build_int16_array([1]),
build_string_array(["a", "b", "c"]),
],
[
@array.get_field(0),
@array.get_field(1),
])
end
end

sub_test_case("DataType") do
def setup
data_type_fields = [
Arrow::Field.new("number", Arrow::Int16DataType.new),
Arrow::Field.new("text", Arrow::StringDataType.new),
]
type_codes = [11, 13]
@data_type = Arrow::DenseUnionDataType.new(data_type_fields, type_codes)
type_ids = build_int8_array([11, 13, nil, 13, 13])
value_offsets = build_int32_array([0, 0, 0, 1, 2])
fields = [
build_int16_array([1]),
build_string_array(["a", "b", "c"])
]
@array = Arrow::DenseUnionArray.new(@data_type, type_ids, value_offsets, fields)
end

def test_value_data_type
assert_equal(@data_type,
@array.value_data_type)
end

def test_field
assert_equal([
build_int16_array([1]),
build_string_array(["a", "b", "c"]),
],
[
@array.get_field(0),
@array.get_field(1),
])
def test_field
assert_equal([
build_int16_array([1]),
build_string_array(["a", "b", "c"]),
],
[
@array.get_field(0),
@array.get_field(1),
])
end
end
end
end
87 changes: 62 additions & 25 deletions c_glib/test/test-sparse-union-array.rb
Expand Up @@ -18,32 +18,69 @@
class TestSparseUnionArray < Test::Unit::TestCase
include Helper::Buildable

def setup
type_ids = build_int8_array([0, 1, nil, 1, 0])
fields = [
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
]
@array = Arrow::SparseUnionArray.new(type_ids, fields)
end
sub_test_case(".new") do
sub_test_case("default") do
def setup
type_ids = build_int8_array([0, 1, nil, 1, 0])
fields = [
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
]
@array = Arrow::SparseUnionArray.new(type_ids, fields)
end

def test_value_data_type
fields = [
Arrow::Field.new("0", Arrow::Int16DataType.new),
Arrow::Field.new("1", Arrow::StringDataType.new),
]
assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
@array.value_data_type)
end
def test_value_data_type
fields = [
Arrow::Field.new("0", Arrow::Int16DataType.new),
Arrow::Field.new("1", Arrow::StringDataType.new),
]
assert_equal(Arrow::SparseUnionDataType.new(fields, [0, 1]),
@array.value_data_type)
end

def test_field
assert_equal([
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
],
[
@array.get_field(0),
@array.get_field(1),
])
end
end

sub_test_case("DataType") do
def setup
data_type_fields = [
Arrow::Field.new("number", Arrow::Int16DataType.new),
Arrow::Field.new("text", Arrow::StringDataType.new),
]
type_codes = [11, 13]
@data_type = Arrow::SparseUnionDataType.new(data_type_fields, type_codes)
type_ids = build_int8_array([11, 13, nil, 13, 11])
fields = [
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
]
@array = Arrow::SparseUnionArray.new(@data_type, type_ids, fields)
end

def test_value_data_type
assert_equal(@data_type,
@array.value_data_type)
end

def test_field
assert_equal([
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
],
[
@array.get_field(0),
@array.get_field(1),
])
def test_field
assert_equal([
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
],
[
@array.get_field(0),
@array.get_field(1),
])
end
end
end
end
Expand Up @@ -69,12 +69,8 @@ def build_record_batch(type, records)
offsets << (type_ids.count(type_id) - 1)
end
end
# TODO
# union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
# Arrow::Int8Array.new(type_ids),
# Arrow::Int32Array.new(offsets),
# arrays)
union_array = Arrow::DenseUnionArray.new(Arrow::Int8Array.new(type_ids),
union_array = Arrow::DenseUnionArray.new(schema.fields[0].data_type,
Arrow::Int8Array.new(type_ids),
Arrow::Int32Array.new(offsets),
arrays)
schema = Arrow::Schema.new(column: union_array.value_data_type)
Expand Down
Expand Up @@ -59,11 +59,8 @@ def build_record_batch(type, records)
type_ids << type_codes[1]
end
end
# TODO
# union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
# Arrow::Int8Array.new(type_ids),
# arrays)
union_array = Arrow::SparseUnionArray.new(Arrow::Int8Array.new(type_ids),
union_array = Arrow::SparseUnionArray.new(schema.fields[0].data_type,
Arrow::Int8Array.new(type_ids),
arrays)
schema = Arrow::Schema.new(column: union_array.value_data_type)
Arrow::RecordBatch.new(schema,
Expand Down