Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions c_glib/arrow-glib/reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#endif

#include <arrow-glib/column.hpp>
#include <arrow-glib/data-type.hpp>
#include <arrow-glib/error.hpp>
#include <arrow-glib/record-batch.hpp>
#include <arrow-glib/schema.hpp>
Expand Down Expand Up @@ -1276,6 +1277,73 @@ garrow_csv_read_options_new(void)
return GARROW_CSV_READ_OPTIONS(csv_read_options);
}

/**
* garrow_csv_read_options_add_column_type:
* @options: A #GArrowCSVReadOptions.
* @name: The name of the target column.
* @data_type: The #GArrowDataType for the column.
*
* Add value type of a column.
*
* Since: 0.12.0
*/
void
garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options,
const gchar *name,
GArrowDataType *data_type)
{
auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
auto arrow_data_type = garrow_data_type_get_raw(data_type);
priv->convert_options.column_types[name] = arrow_data_type;
}

/**
* garrow_csv_read_options_add_schema:
* @options: A #GArrowCSVReadOptions.
* @schema: The #GArrowSchema that specifies columns and their types.
*
* Add value types for columns in the schema.
*
* Since: 0.12.0
*/
void
garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options,
GArrowSchema *schema)
{
auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
auto arrow_schema = garrow_schema_get_raw(schema);
for (const auto field : arrow_schema->fields()) {
priv->convert_options.column_types[field->name()] = field->type();
}
}

/**
* garrow_csv_read_options_get_column_types:
* @options: A #GArrowCSVReadOptions.
*
* Returns: (transfer full) (element-type gchar* GArrowDataType):
* The column name and value type mapping of the options.
*
* Since: 0.12.0
*/
GHashTable *
garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options)
{
auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
GHashTable *types = g_hash_table_new_full(g_str_hash,
g_str_equal,
g_free,
g_object_unref);
for (const auto iter : priv->convert_options.column_types) {
auto arrow_name = iter.first;
auto arrow_data_type = iter.second;
g_hash_table_insert(types,
g_strdup(arrow_name.c_str()),
garrow_data_type_new_raw(&arrow_data_type));
}
return types;
}


typedef struct GArrowCSVReaderPrivate_ {
std::shared_ptr<arrow::csv::TableReader> reader;
Expand Down
9 changes: 9 additions & 0 deletions c_glib/arrow-glib/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,15 @@ struct _GArrowCSVReadOptionsClass
};

GArrowCSVReadOptions *garrow_csv_read_options_new(void);
void
garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options,
const gchar *name,
GArrowDataType *data_type);
void
garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options,
GArrowSchema *schema);
GHashTable *
garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options);

#define GARROW_TYPE_CSV_READER (garrow_csv_reader_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader,
Expand Down
64 changes: 50 additions & 14 deletions c_glib/test/test-csv-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,56 @@ def test_default
table.read)
end

def test_options
options = Arrow::CSVReadOptions.new
options.quoted = false
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
message,count
"Start",2
"Shutdown",9
CSV
columns = {
"message" => build_string_array(["\"Start\"", "\"Shutdown\""]),
"count" => build_int64_array([2, 9]),
}
assert_equal(build_table(columns),
table.read)
sub_test_case("options") do
def test_add_column_type
options = Arrow::CSVReadOptions.new
options.add_column_type("count", Arrow::UInt8DataType.new)
options.add_column_type("valid", Arrow::BooleanDataType.new)
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
count,valid
2,1
9,0
CSV
columns = {
"count" => build_uint8_array([2, 9]),
"valid" => build_boolean_array([true, false]),
}
assert_equal(build_table(columns),
table.read)
end

def test_add_schema
options = Arrow::CSVReadOptions.new
fields = [
Arrow::Field.new("count", Arrow::UInt8DataType.new),
Arrow::Field.new("valid", Arrow::BooleanDataType.new),
]
schema = Arrow::Schema.new(fields)
options.add_schema(schema)
table = Arrow::CSVReader.new(open_input(<<-CSV), options)
count,valid
2,1
9,0
CSV
columns = {
"count" => build_uint8_array([2, 9]),
"valid" => build_boolean_array([true, false]),
}
assert_equal(build_table(columns),
table.read)
end

def test_column_types
require_gi_bindings(3, 3, 1)
options = Arrow::CSVReadOptions.new
options.add_column_type("count", Arrow::UInt8DataType.new)
options.add_column_type("valid", Arrow::BooleanDataType.new)
assert_equal({
"count" => Arrow::UInt8DataType.new,
"valid" => Arrow::BooleanDataType.new,
},
options.column_types)
end
end
end
end