diff --git a/.gitignore b/.gitignore index 1b782f9d..8c7aedc6 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ *.bson *.capnproto *.cbor +*.csv *.json *.fb *.flexbuf diff --git a/CMakeLists.txt b/CMakeLists.txt index e811cb42..e73bd25a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,7 @@ option(REFLECTCPP_AVRO "Enable AVRO support" ${REFLECTCPP_ALL_FORMATS}) option(REFLECTCPP_BSON "Enable BSON support" ${REFLECTCPP_ALL_FORMATS}) option(REFLECTCPP_CAPNPROTO "Enable Cap’n Proto support" ${REFLECTCPP_ALL_FORMATS}) option(REFLECTCPP_CBOR "Enable CBOR support" ${REFLECTCPP_ALL_FORMATS}) +option(REFLECTCPP_CSV "Enable CSV support" ${REFLECTCPP_ALL_FORMATS}) option(REFLECTCPP_FLEXBUFFERS "Enable flexbuffers support" ${REFLECTCPP_ALL_FORMATS}) option(REFLECTCPP_MSGPACK "Enable msgpack support" ${REFLECTCPP_ALL_FORMATS}) option(REFLECTCPP_PARQUET "Enable parquet support" ${REFLECTCPP_ALL_FORMATS}) @@ -55,8 +56,8 @@ endif() if (REFLECTCPP_BUILD_TESTS OR REFLECTCPP_BUILD_BENCHMARKS OR (REFLECTCPP_JSON AND NOT REFLECTCPP_USE_BUNDLED_DEPENDENCIES) OR REFLECTCPP_AVRO OR - REFLECTCPP_BSON OR REFLECTCPP_CAPNPROTO OR REFLECTCPP_CBOR OR REFLECTCPP_FLEXBUFFERS OR - REFLECTCPP_MSGPACK OR REFLECTCPP_PARQUET OR REFLECTCPP_XML OR + REFLECTCPP_BSON OR REFLECTCPP_CAPNPROTO OR REFLECTCPP_CBOR OR REFLECTCPP_CSV OR + REFLECTCPP_FLEXBUFFERS OR REFLECTCPP_MSGPACK OR REFLECTCPP_PARQUET OR REFLECTCPP_XML OR REFLECTCPP_TOML OR REFLECTCPP_UBJSON OR REFLECTCPP_YAML) # enable vcpkg per default if features other than JSON are required set(REFLECTCPP_USE_VCPKG_DEFAULT ON) @@ -95,6 +96,10 @@ if (REFLECTCPP_USE_VCPKG) list(APPEND VCPKG_MANIFEST_FEATURES "cbor") endif() + if (REFLECTCPP_CSV) + list(APPEND VCPKG_MANIFEST_FEATURES "csv") + endif() + if (NOT REFLECTCPP_USE_BUNDLED_DEPENDENCIES) list(APPEND VCPKG_MANIFEST_FEATURES "ctre") endif() @@ -246,6 +251,13 @@ if (REFLECTCPP_CBOR) include_directories(PUBLIC ${jsoncons_INCLUDE_DIRS}) endif () +if (REFLECTCPP_CSV) + if (NOT TARGET Arrow) + find_package(Arrow CONFIG REQUIRED) + endif() + target_link_libraries(reflectcpp PUBLIC "$,Arrow::arrow_static,Arrow::arrow_shared>") +endif () + if (REFLECTCPP_FLEXBUFFERS) list(APPEND REFLECT_CPP_SOURCES src/reflectcpp_flexbuf.cpp diff --git a/docs/supported_formats/csv.md b/docs/supported_formats/csv.md new file mode 100644 index 00000000..0065ea67 --- /dev/null +++ b/docs/supported_formats/csv.md @@ -0,0 +1,218 @@ +# csv + +For CSV support, include the header `` and link to the [Apache Arrow](https://arrow.apache.org/) library. +Furthermore, when compiling reflect-cpp, you need to pass `-DREFLECTCPP_CSV=ON` to cmake. + +CSV is a tabular text format. Like other tabular formats in reflect-cpp, CSV is designed for collections of flat records and has limitations for nested or variant types. + +## Reading and writing + +Suppose you have a struct like this: + +```cpp +struct Person { + std::string first_name; + std::string last_name = "Simpson"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + unsigned int age; + rfl::Email email; +}; +``` + +Important: CSV is a tabular format that requires collections of records. You cannot serialize individual structs - you must use containers like `std::vector`, `std::deque`, etc. + +Write a collection to a string (CSV bytes) like this: + +```cpp +const auto people = std::vector{ + Person{.first_name = "Bart", .birthday = "1987-04-19", .age = 10, .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", .birthday = "1987-04-19", .age = 8, .email = "lisa@simpson.com"} +}; + +const std::string csv_text = rfl::csv::write(people); +``` + +Parse from a string or bytes view: + +```cpp +const rfl::Result> result = rfl::csv::read>(csv_text); +``` + +## Settings + +CSV behavior can be configured using `rfl::csv::Settings`: + +```cpp +const auto settings = rfl::csv::Settings{} + .with_delimiter(';') + .with_quoting(true) + .with_quote_char('"') + .with_null_string("n/a") + .with_double_quote(true) + .with_escaping(false) + .with_escape_char('\\') + .with_newlines_in_values(false) + .with_ignore_empty_lines(true) + .with_batch_size(1024); + +const std::string csv_text = rfl::csv::write(people, settings); +``` + +Key options: +- `batch_size` - Maximum number of rows processed per batch (performance tuning) +- `delimiter` - Field delimiter character +- `quoting` - Whether to use quoting when writing +- `quote_char` - Quote character used when reading +- `null_string` - String representation for null values +- `double_quote` - Whether a quote inside a value is double-quoted (reading) +- `escaping` - Whether escaping is used (reading) +- `escape_char` - Escape character (reading) +- `newlines_in_values` - Whether CR/LF are allowed inside values (reading) +- `ignore_empty_lines` - Whether empty lines are ignored (reading) + +## Loading and saving + +You can load from and save to disk: + +```cpp +const rfl::Result> result = rfl::csv::load>("/path/to/file.csv"); + +const auto people = std::vector{...}; +rfl::csv::save("/path/to/file.csv", people); +``` + +With custom settings: + +```cpp +const auto settings = rfl::csv::Settings{}.with_delimiter(';'); +rfl::csv::save("/path/to/file.csv", people, settings); +``` + +## Reading from and writing into streams + +You can read from any `std::istream` and write to any `std::ostream`: + +```cpp +const rfl::Result> result = rfl::csv::read>(my_istream); + +const auto people = std::vector{...}; +rfl::csv::write(people, my_ostream); +``` + +With custom settings: + +```cpp +const auto settings = rfl::csv::Settings{}.with_delimiter(';'); +rfl::csv::write(people, my_ostream, settings); +``` + +## Field name transformations + +Like other formats, CSV supports field name transformations via processors, e.g. `SnakeCaseToCamelCase`: + +```cpp +const auto people = std::vector{...}; +const auto result = rfl::csv::read, rfl::SnakeCaseToCamelCase>(csv_text); +``` + +## Enums and validation + +CSV supports enums and validated types. Enums are written/read as strings: + +```cpp +enum class FirstName { Bart, Lisa, Maggie, Homer }; + +struct Person { + rfl::Rename<"firstName", FirstName> first_name; + rfl::Rename<"lastName", std::string> last_name; + rfl::Timestamp<"%Y-%m-%d"> birthday; + rfl::Validator, rfl::Maximum<130>> age; + rfl::Email email; +}; +``` + +## Limitations of tabular formats + +CSV, like other tabular formats, has limitations compared to hierarchical formats such as JSON or XML: + +### Collections requirement +You must serialize collections, not individual objects: +```cpp +std::vector people = {...}; // ✅ Correct +Person person = {...}; // ❌ Wrong - must be in a container +``` + +### No nested objects +Each field must be a primitive type, enum, or a simple validated type. Nested objects are not automatically flattened: +```cpp +// This would NOT work as expected - nested objects are not automatically flattened +struct Address { + std::string street; + std::string city; +}; + +struct Person { + std::string first_name; + std::string last_name; + Address address; // ❌ Will cause compilation errors for CSV +}; +``` + +### Using rfl::Flatten for nested objects +If you need to include nested objects, use `rfl::Flatten` to explicitly flatten them: +```cpp +struct Address { + std::string street; + std::string city; +}; + +struct Person { + std::string first_name; + std::string last_name; + rfl::Flatten
address; // ✅ This will flatten the Address fields +}; + +// The resulting CSV will have columns: first_name, last_name, street, city +``` + +### No variant types +Variant types like `std::variant`, `rfl::Variant`, or `rfl::TaggedUnion` cannot be serialized to CSV as separate columns: +```cpp +// ❌ This will NOT work +struct Person { + std::string first_name; + std::variant status; // Variant - not supported + rfl::Variant type; // rfl::Variant - not supported + rfl::TaggedUnion<"type", std::string, int> category; // TaggedUnion - not supported +}; +``` + +### No arrays (except bytestrings) +CSV output here does not support arrays (lists) of values in a single column. The only array-like field supported is binary data represented as bytestrings: +```cpp +// ❌ This will NOT work +struct Person { + std::string first_name; + std::vector hobbies; // Array of strings - not supported + std::vector scores; // Array of integers - not supported + std::vector
addresses; // Array of objects - not supported +}; + +// ✅ This works +struct Blob { + std::vector binary_data; // Binary data supported as bytestring +}; +``` + +### Use cases +CSV is ideal for: +- Data exchange and interoperability +- Simple, flat data structures with consistent types +- Human-readable datasets + +CSV is less suitable for: +- Complex nested data structures +- Data with arrays or variant types +- Strict schemas with evolving types +- Very large datasets where binary columnar formats are preferred + diff --git a/docs/supported_formats/parquet.md b/docs/supported_formats/parquet.md index f259b3ce..0cba3e1b 100644 --- a/docs/supported_formats/parquet.md +++ b/docs/supported_formats/parquet.md @@ -1,3 +1,5 @@ +# parquet + For Parquet support, you must also include the header `` and link to the [Apache Arrow](https://arrow.apache.org/) and [Apache Parquet](https://parquet.apache.org/) libraries. Furthermore, when compiling reflect-cpp, you need to pass `-DREFLECTCPP_PARQUET=ON` to cmake. diff --git a/include/rfl/Timestamp.hpp b/include/rfl/Timestamp.hpp index 9909c0f0..31eaa657 100644 --- a/include/rfl/Timestamp.hpp +++ b/include/rfl/Timestamp.hpp @@ -64,6 +64,12 @@ class Timestamp { return from_string(_str.c_str()); } + /// Returns a result containing the timestamp when successful or an Error + /// otherwise. + static Result make(const auto& _str) noexcept { + return from_string(_str); + } + /// Necessary for the serialization to work. ReflectionType reflection() const { char outstr[200]; diff --git a/include/rfl/csv.hpp b/include/rfl/csv.hpp new file mode 100644 index 00000000..fc657579 --- /dev/null +++ b/include/rfl/csv.hpp @@ -0,0 +1,10 @@ +#ifndef RFL_CSV_HPP_ +#define RFL_CSV_HPP_ + +#include "../rfl.hpp" +#include "csv/load.hpp" +#include "csv/read.hpp" +#include "csv/save.hpp" +#include "csv/write.hpp" + +#endif diff --git a/include/rfl/csv/Settings.hpp b/include/rfl/csv/Settings.hpp new file mode 100644 index 00000000..fa015213 --- /dev/null +++ b/include/rfl/csv/Settings.hpp @@ -0,0 +1,97 @@ +#ifndef RFL_CSV_SETTINGS_HPP_ +#define RFL_CSV_SETTINGS_HPP_ + +#include +#include + +#include "../Field.hpp" +#include "../replace.hpp" + +namespace rfl::csv { + +struct Settings { + /// Maximum number of rows processed at a time. + /// Data is processed in batches of N rows. This number + /// can impact performance. + int32_t batch_size = 1024; + + /// Field delimiter. + char delimiter = ','; + + /// Whether quoting is used. + bool quoting = true; + + /// Quoting character (if quoting is true). Only relevant for reading. + char quote_char = '"'; + + /// The string to be used for null values. Quotes are not allowed in this + /// string. + std::string null_string = "n/a"; + + /// Whether a quote inside a value is double-quoted. Only relevant for + /// reading. + bool double_quote = true; + + /// Whether escaping is used. Only relevant for reading. + bool escaping = false; + + /// Escaping character (if escaping is true). Only relevant for reading. + char escape_char = arrow::csv::kDefaultEscapeChar; + + /// Whether values are allowed to contain CR (0x0d) and LF (0x0a) + /// characters. Only relevant for reading. + bool newlines_in_values = false; + + /// Whether empty lines are ignored. + /// If false, an empty line represents a single empty value (assuming a + /// one-column CSV file). Only relevant for reading. + bool ignore_empty_lines = true; + + Settings with_batch_size(const int32_t _batch_size) const noexcept { + return replace(*this, make_field<"batch_size">(_batch_size)); + } + + Settings with_delimiter(const char _delimiter) const noexcept { + return replace(*this, make_field<"delimiter">(_delimiter)); + } + + Settings with_quoting(const bool _quoting) const noexcept { + return replace(*this, make_field<"quoting">(_quoting)); + } + + Settings with_quote_char(const char _quote_char) const noexcept { + return replace(*this, make_field<"quote_char">(_quote_char)); + } + + Settings with_null_string(const std::string& _null_string) const noexcept { + return replace(*this, make_field<"null_string">(_null_string)); + } + + Settings with_double_quote(const bool _double_quote) const noexcept { + return replace(*this, make_field<"double_quote">(_double_quote)); + } + + Settings with_escaping(const bool _escaping) const noexcept { + return replace(*this, make_field<"escaping">(_escaping)); + } + + Settings with_escape_char(const char _escape_char) const noexcept { + return replace(*this, make_field<"escape_char">(_escape_char)); + } + + Settings with_newlines_in_values( + const bool _newlines_in_values) const noexcept { + return replace(*this, + make_field<"newlines_in_values">(_newlines_in_values)); + } + + Settings with_ignore_empty_lines( + const bool _ignore_empty_lines) const noexcept { + return replace(*this, + make_field<"ignore_empty_lines">(_ignore_empty_lines)); + } +}; + +} // namespace rfl::csv + +#endif diff --git a/include/rfl/csv/load.hpp b/include/rfl/csv/load.hpp new file mode 100644 index 00000000..a000070e --- /dev/null +++ b/include/rfl/csv/load.hpp @@ -0,0 +1,22 @@ +#ifndef RFL_CSV_CSV_HPP_ +#define RFL_CSV_CSV_HPP_ + +#include "../Result.hpp" +#include "../io/load_string.hpp" +#include "Settings.hpp" +#include "read.hpp" + +namespace rfl::csv { + +template +Result load(const std::string& _fname, + const Settings& _settings = Settings{}) { + const auto read_string = [&](const auto& _str) { + return read(_str, _settings); + }; + return rfl::io::load_string(_fname).and_then(read_string); +} + +} // namespace rfl::csv + +#endif diff --git a/include/rfl/csv/read.hpp b/include/rfl/csv/read.hpp new file mode 100644 index 00000000..178382f5 --- /dev/null +++ b/include/rfl/csv/read.hpp @@ -0,0 +1,91 @@ +#ifndef RFL_CSV_READ_HPP_ +#define RFL_CSV_READ_HPP_ + +#include +#include + +#include +#include +#include +#include + +#include "../Processors.hpp" +#include "../Result.hpp" +#include "../concepts.hpp" +#include "../internal/wrap_in_rfl_array_t.hpp" +#include "../parsing/tabular/ArrowReader.hpp" +#include "Settings.hpp" + +namespace rfl::csv { + +/// Parses an object from CSV using reflection. +template +Result> read( + const char* _str, const size_t _size, + const Settings& _settings = Settings{}) { + arrow::io::IOContext io_context = arrow::io::default_io_context(); + + const auto buffer = std::make_shared( + internal::ptr_cast(_str), _size); + + std::shared_ptr input = + std::make_shared(buffer); + + auto read_options = arrow::csv::ReadOptions::Defaults(); + auto convert_options = arrow::csv::ConvertOptions::Defaults(); + convert_options.null_values = + std::vector({_settings.null_string}); + convert_options.strings_can_be_null = true; + + auto parse_options = arrow::csv::ParseOptions::Defaults(); + parse_options.delimiter = _settings.delimiter; + parse_options.quoting = _settings.quoting; + parse_options.quote_char = _settings.quote_char; + parse_options.double_quote = _settings.double_quote; + parse_options.escaping = _settings.escaping; + parse_options.escape_char = _settings.escape_char; + parse_options.newlines_in_values = _settings.newlines_in_values; + parse_options.ignore_empty_lines = _settings.ignore_empty_lines; + + auto maybe_reader = arrow::csv::TableReader::Make( + io_context, input, read_options, parse_options, convert_options); + + if (!maybe_reader.ok()) { + return error("Could not construct CSV reader: " + + maybe_reader.status().message()); + } + + std::shared_ptr reader = *maybe_reader; + + auto maybe_table = reader->Read(); + if (!maybe_table.ok()) { + return error("Could not read table: " + maybe_table.status().message()); + } + + const std::shared_ptr table = *maybe_table; + + using ArrowReader = + parsing::tabular::ArrowReader; + + return ArrowReader::make(table).and_then( + [](const auto& _r) { return _r.read(); }); +} + +/// Parses an object from CSV using reflection. +template +auto read(const std::string_view _str, const Settings& _settings = Settings{}) { + return read(_str.data(), _str.size(), _settings); +} + +/// Parses an object from a stream. +template +auto read(std::istream& _stream, const Settings& _settings = Settings{}) { + std::istreambuf_iterator begin(_stream), end; + auto bytes = std::vector(begin, end); + return read(bytes.data(), bytes.size(), _settings); +} + +} // namespace rfl::csv + +#endif diff --git a/include/rfl/csv/save.hpp b/include/rfl/csv/save.hpp new file mode 100644 index 00000000..56dcd928 --- /dev/null +++ b/include/rfl/csv/save.hpp @@ -0,0 +1,26 @@ +#ifndef RFL_CSV_SAVE_HPP_ +#define RFL_CSV_SAVE_HPP_ + +#include +#include +#include + +#include "../Result.hpp" +#include "../io/save_string.hpp" +#include "Settings.hpp" +#include "write.hpp" + +namespace rfl::csv { + +template +Result save(const std::string& _fname, const auto& _obj, + const Settings& _settings = Settings{}) { + const auto write_func = [&](const auto& _obj, auto& _stream) -> auto& { + return write(_obj, _stream, _settings); + }; + return rfl::io::save_string(_fname, _obj, write_func); +} + +} // namespace rfl::csv + +#endif diff --git a/include/rfl/csv/write.hpp b/include/rfl/csv/write.hpp new file mode 100644 index 00000000..6f9bd931 --- /dev/null +++ b/include/rfl/csv/write.hpp @@ -0,0 +1,81 @@ +#ifndef RFL_CSV_WRITE_HPP_ +#define RFL_CSV_WRITE_HPP_ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../Processors.hpp" +#include "../Ref.hpp" +#include "../parsing/tabular/ArrowWriter.hpp" +#include "Settings.hpp" + +namespace rfl::csv { + +/// Returns CSV bytes. +template +Ref to_buffer(const auto& _arr, const Settings& _settings) { + using T = std::remove_cvref_t; + + const auto table = + parsing::tabular::ArrowWriter(_settings.batch_size) + .to_table(_arr); + + const auto output_buffer = arrow::io::BufferOutputStream::Create(); + + if (!output_buffer.ok()) { + throw std::runtime_error(output_buffer.status().message()); + } + + auto options = arrow::csv::WriteOptions::Defaults(); + options.batch_size = _settings.batch_size; + options.delimiter = _settings.delimiter; + options.null_string = _settings.null_string; + options.quoting_style = _settings.quoting ? arrow::csv::QuotingStyle::Needed + : arrow::csv::QuotingStyle::None; + + const auto status = + arrow::csv::WriteCSV(*table, options, output_buffer.ValueOrDie().get()); + + if (!status.ok()) { + throw std::runtime_error(status.message()); + } + + const auto buffer = output_buffer.ValueOrDie()->Finish(); + + if (!buffer.ok()) { + throw std::runtime_error(output_buffer.status().message()); + } + + return Ref::make(buffer.ValueOrDie()).value(); +} + +/// Returns CSV bytes. +template +std::string write(const auto& _arr, const Settings& _settings = Settings{}) { + const auto buffer = to_buffer(_arr, _settings); + const auto view = std::string_view(*buffer); + return std::string(view); +} + +/// Writes a CSV into an ostream. +template +std::ostream& write(const auto& _arr, std::ostream& _stream, + const Settings& _settings = Settings{}) { + auto buffer = to_buffer(_arr, _settings); + _stream << std::string_view(*buffer); + return _stream; +} + +} // namespace rfl::csv + +#endif diff --git a/include/rfl/parquet/read.hpp b/include/rfl/parquet/read.hpp index 68ff2e76..4da4d9f1 100644 --- a/include/rfl/parquet/read.hpp +++ b/include/rfl/parquet/read.hpp @@ -42,7 +42,8 @@ Result> read( return error("Could not read table: " + status.message()); } - using ArrowReader = parsing::tabular::ArrowReader; + using ArrowReader = parsing::tabular::ArrowReader< + T, parsing::tabular::SerializationType::parquet, Ps...>; return ArrowReader::make(table).and_then( [](const auto& _r) { return _r.read(); }); diff --git a/include/rfl/parquet/save.hpp b/include/rfl/parquet/save.hpp index 092abe64..93929f6e 100644 --- a/include/rfl/parquet/save.hpp +++ b/include/rfl/parquet/save.hpp @@ -7,14 +7,16 @@ #include "../Result.hpp" #include "../io/save_bytes.hpp" +#include "Settings.hpp" #include "write.hpp" namespace rfl::parquet { template -Result save(const std::string& _fname, const auto& _obj) { - const auto write_func = [](const auto& _obj, auto& _stream) -> auto& { - return write(_obj, _stream); +Result save(const std::string& _fname, const auto& _obj, + const Settings& _settings = Settings{}) { + const auto write_func = [&](const auto& _obj, auto& _stream) -> auto& { + return write(_obj, _stream, _settings); }; return rfl::io::save_bytes(_fname, _obj, write_func); } diff --git a/include/rfl/parquet/write.hpp b/include/rfl/parquet/write.hpp index 0dd397af..d39147e3 100644 --- a/include/rfl/parquet/write.hpp +++ b/include/rfl/parquet/write.hpp @@ -25,7 +25,9 @@ Ref to_buffer(const auto& _arr, const Settings& _settings) { using T = std::remove_cvref_t; const auto table = - parsing::tabular::ArrowWriter(_settings.chunksize) + parsing::tabular::ArrowWriter< + T, parsing::tabular::SerializationType::parquet, Ps...>( + _settings.chunksize) .to_table(_arr); const auto props = ::parquet::WriterProperties::Builder() @@ -70,7 +72,7 @@ std::vector write(const auto& _arr, /// Writes a PARQUET into an ostream. template std::ostream& write(const auto& _arr, std::ostream& _stream, - const Settings& _settings = Settings{}) noexcept { + const Settings& _settings = Settings{}) { auto buffer = to_buffer(_arr, _settings); _stream << std::string_view(*buffer); return _stream; diff --git a/include/rfl/parsing/tabular/ArrowReader.hpp b/include/rfl/parsing/tabular/ArrowReader.hpp index 153778b7..5956f468 100644 --- a/include/rfl/parsing/tabular/ArrowReader.hpp +++ b/include/rfl/parsing/tabular/ArrowReader.hpp @@ -23,7 +23,7 @@ namespace rfl::parsing::tabular { -template +template class ArrowReader { static_assert(!Processors::add_tags_to_variants_, "rfl::AddTagsToVariants cannot be used for tabular data."); @@ -51,7 +51,8 @@ class ArrowReader { ~ArrowReader() = default; Result read() const noexcept { - return make_chunked_array_iterators>(table_) + return make_chunked_array_iterators, _s>( + table_) .and_then([&](auto chunked_array_iterators) -> Result { VecType result; while (!end(chunked_array_iterators)) { @@ -78,8 +79,11 @@ class ArrowReader { Result new_value(auto* _chunked_array_iterators) const noexcept { alignas(ValueType) unsigned char buf[sizeof(ValueType)]{}; auto ptr = internal::ptr_cast(&buf); + auto view = to_view(*ptr); + using ViewType = std::remove_cvref_t; + try { const auto set_one = [&](std::integral_constant) { using FieldType = tuple_element_t<_i, typename ViewType::Fields>; @@ -99,11 +103,11 @@ class ArrowReader { [&](std::integer_sequence) { (set_one(std::integral_constant{}), ...); }(std::make_integer_sequence()); - - return std::move(*ptr); } catch (const std::exception& e) { return error(e.what()); } + + return std::move(*ptr); } template diff --git a/include/rfl/parsing/tabular/ArrowTypes.hpp b/include/rfl/parsing/tabular/ArrowTypes.hpp index 2c7bee83..26acc3fc 100644 --- a/include/rfl/parsing/tabular/ArrowTypes.hpp +++ b/include/rfl/parsing/tabular/ArrowTypes.hpp @@ -26,11 +26,17 @@ namespace rfl::parsing::tabular { -template +enum class SerializationType { csv, parquet }; + +template struct ArrowTypes; -template <> -struct ArrowTypes { +template +Result::ArrayType>> transform_numerical_array( + const std::shared_ptr& _arr) noexcept; + +template +struct ArrowTypes { using ArrayType = arrow::BooleanArray; using BuilderType = arrow::BooleanBuilder; @@ -43,6 +49,16 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + if (_arr->type()->Equals(data_type())) { + return Ref::make(std::static_pointer_cast(_arr)); + } else { + return error("Expected boolean array, got " + _arr->type()->ToString() + + "."); + } + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -51,10 +67,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::UInt8Array; using BuilderType = arrow::UInt8Builder; + using T = uint8_t; static auto data_type() { return arrow::uint8(); } @@ -65,6 +82,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -73,10 +95,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::UInt16Array; using BuilderType = arrow::UInt16Builder; + using T = uint16_t; static auto data_type() { return arrow::uint16(); } @@ -87,6 +110,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -95,10 +123,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::UInt32Array; using BuilderType = arrow::UInt32Builder; + using T = uint32_t; static auto data_type() { return arrow::uint32(); } @@ -109,6 +138,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -117,10 +151,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::UInt64Array; using BuilderType = arrow::UInt64Builder; + using T = uint64_t; static auto data_type() { return arrow::uint64(); } @@ -131,6 +166,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -139,10 +179,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::Int8Array; using BuilderType = arrow::Int8Builder; + using T = int8_t; static auto data_type() { return arrow::int8(); } @@ -153,6 +194,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -161,10 +207,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::Int16Array; using BuilderType = arrow::Int16Builder; + using T = int16_t; static auto data_type() { return arrow::int16(); } @@ -175,6 +222,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -183,10 +235,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::Int32Array; using BuilderType = arrow::Int32Builder; + using T = int32_t; static auto data_type() { return arrow::int32(); } @@ -197,6 +250,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -205,10 +263,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::Int64Array; using BuilderType = arrow::Int64Builder; + using T = int64_t; static auto data_type() { return arrow::int64(); } @@ -219,6 +278,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -227,10 +291,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::FloatArray; using BuilderType = arrow::FloatBuilder; + using T = float; static auto data_type() { return arrow::float32(); } @@ -241,6 +306,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -249,10 +319,11 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::DoubleArray; using BuilderType = arrow::DoubleBuilder; + using T = double; static auto data_type() { return arrow::float64(); } @@ -263,6 +334,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return transform_numerical_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return _chunk->Value(_ix); @@ -271,8 +347,8 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template <> -struct ArrowTypes { +template +struct ArrowTypes { using ArrayType = arrow::StringArray; using BuilderType = arrow::StringBuilder; @@ -285,6 +361,16 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + if (_arr->type()->Equals(data_type())) { + return Ref::make(std::static_pointer_cast(_arr)); + } else { + return error("Expected string array, got " + _arr->type()->ToString() + + "."); + } + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return std::string(_chunk->Value(_ix)); @@ -293,9 +379,9 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template +template requires enchantum::Enum -struct ArrowTypes { +struct ArrowTypes { using ArrayType = arrow::StringArray; using BuilderType = arrow::StringBuilder; @@ -308,6 +394,11 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { return string_to_enum(std::string(_chunk->Value(_ix))); } @@ -315,9 +406,9 @@ struct ArrowTypes { static auto make_builder() { return BuilderType(); } }; -template +template requires concepts::ContiguousByteContainer -struct ArrowTypes { +struct ArrowTypes { using ArrayType = arrow::BinaryArray; using BuilderType = arrow::BinaryBuilder; @@ -331,6 +422,21 @@ struct ArrowTypes { } } + static Result> get_array( + const std::shared_ptr& _arr) { + if (_arr->type()->Equals(data_type())) { + return Ref::make(std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals(arrow::utf8())) { + return transform_string( + std::static_pointer_cast(_arr)); + + } else { + return error("Expected binary or string array, got " + + _arr->type()->ToString() + "."); + } + } + static Result get_value(const Ref& _chunk, const int64_t _ix) { const auto begin = internal::ptr_cast( _chunk->Value(_ix).data()); @@ -338,22 +444,92 @@ struct ArrowTypes { } static auto make_builder() { return BuilderType(); } + + static Result> transform_string( + const std::shared_ptr& _arr) noexcept { + if (!_arr) { + return error( + "transform_string: std::shared_ptr not set. This is a " + "bug, please report."); + } + + auto builder = arrow::BinaryBuilder(); + + for (int64_t i = 0; i < _arr->length(); ++i) { + if (_arr->IsNull(i)) { + const auto status = builder.AppendNull(); + if (!status.ok()) { + return error(status.message()); + } + } else { + const std::string_view s = _arr->Value(i); + const auto status = builder.Append( + internal::ptr_cast(s.data()), s.size()); + if (!status.ok()) { + return error(status.message()); + } + } + } + + std::shared_ptr res; + const auto status = builder.Finish(&res); + return Ref::make( + std::static_pointer_cast(res)); + } }; -template -struct ArrowTypes> { +template +struct ArrowTypes, _s> { + enum class TimeUnit { day, second, milli, micro, nano, string }; + using ArrayType = arrow::TimestampArray; using BuilderType = arrow::TimestampBuilder; - static auto data_type() { return arrow::timestamp(arrow::TimeUnit::SECOND); } + static auto data_type() { return arrow::timestamp(arrow::TimeUnit::MILLI); } static void add_to_builder(const auto& _val, BuilderType* _builder) { - const auto status = _builder->Append(_val.to_time_t()); + const auto status = _builder->Append(_val.to_time_t() * 1000); if (!status.ok()) { throw std::runtime_error(status.message()); } } + static Result> get_array( + const std::shared_ptr& _arr) { + if (_arr->type()->Equals(data_type())) { + return Ref::make(std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals( + arrow::timestamp(arrow::TimeUnit::SECOND))) { + return transform_time_stamp( + std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals(arrow::timestamp(arrow::TimeUnit::MICRO))) { + return transform_time_stamp( + std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals(arrow::timestamp(arrow::TimeUnit::NANO))) { + return transform_time_stamp( + std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals(arrow::date32())) { + return transform_time_stamp( + std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals(arrow::date64())) { + return transform_time_stamp( + std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals(arrow::utf8())) { + return transform_time_stamp( + std::static_pointer_cast(_arr)); + + } else { + return error("Expected timestamp, date32, date64 or string array, got " + + _arr->type()->ToString() + "."); + } + } + static Result> get_value(const Ref& _chunk, const int64_t _ix) { return Timestamp<_format>(_chunk->Value(_ix) / 1000); @@ -362,51 +538,159 @@ struct ArrowTypes> { static auto make_builder() { return BuilderType(data_type(), arrow::default_memory_pool()); } + + template + static Result> transform_time_stamp( + const std::shared_ptr& _arr) noexcept { + if (!_arr) { + return error( + "transform_time_stamp: std::shared_ptr not set. This is a " + "bug, please report."); + } + + auto builder = + arrow::TimestampBuilder(data_type(), arrow::default_memory_pool()); + + for (int64_t i = 0; i < _arr->length(); ++i) { + if (_arr->IsNull(i)) { + const auto status = builder.AppendNull(); + if (!status.ok()) { + return error(status.message()); + } + } else { + if constexpr (_unit == TimeUnit::day) { + const auto status = builder.Append( + static_cast(_arr->Value(i)) * 1000 * 24 * 60 * 60); + if (!status.ok()) { + return error(status.message()); + } + } else if constexpr (_unit == TimeUnit::second) { + const auto status = + builder.Append(static_cast(_arr->Value(i) * 1000)); + if (!status.ok()) { + return error(status.message()); + } + } else if constexpr (_unit == TimeUnit::milli) { + const auto status = + builder.Append(static_cast(_arr->Value(i))); + if (!status.ok()) { + return error(status.message()); + } + } else if constexpr (_unit == TimeUnit::micro) { + const auto status = + builder.Append(static_cast(_arr->Value(i) / 1000)); + if (!status.ok()) { + return error(status.message()); + } + } else if constexpr (_unit == TimeUnit::nano) { + const auto status = + builder.Append(static_cast(_arr->Value(i) / 1000000)); + if (!status.ok()) { + return error(status.message()); + } + } else if constexpr (_unit == TimeUnit::string) { + const auto ts = Timestamp<_format>::make(std::string(_arr->Value(i))); + if (!ts) { + return error(ts.error().what()); + } + const auto status = builder.Append(ts->to_time_t() * 1000); + if (!status.ok()) { + return error(status.message()); + } + } else { + static_assert(rfl::always_false_v, + "Unsupported time unit."); + } + } + } + + std::shared_ptr res; + const auto status = builder.Finish(&res); + return Ref::make( + std::static_pointer_cast(res)); + } }; -template +template +struct ArrowTypes, SerializationType::csv> { + using ArrayType = arrow::TimestampArray; + using BuilderType = arrow::StringBuilder; + + static auto data_type() { return arrow::timestamp(arrow::TimeUnit::MILLI); } + + static void add_to_builder(const Timestamp<_format>& _val, + BuilderType* _builder) { + const auto status = _builder->Append(_val.str()); + if (!status.ok()) { + throw std::runtime_error(status.message()); + } + } + + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes, + SerializationType::parquet>::get_array(_arr); + } + + static Result> get_value(const Ref& _chunk, + const int64_t _ix) { + return ArrowTypes, + SerializationType::parquet>::get_value(_chunk, _ix); + } + + static auto make_builder() { return BuilderType(); } +}; + +template requires internal::has_reflection_type_v -struct ArrowTypes { - using ArrayType = typename ArrowTypes::ArrayType; +struct ArrowTypes { + using ArrayType = + typename ArrowTypes::ArrayType; using BuilderType = - typename ArrowTypes::BuilderType; + typename ArrowTypes::BuilderType; static auto data_type() { - return ArrowTypes::data_type(); + return ArrowTypes::data_type(); } static void add_to_builder(const auto& _val, BuilderType* _builder) { - ArrowTypes::add_to_builder(_val.reflection(), - _builder); + ArrowTypes::add_to_builder( + _val.reflection(), _builder); + } + + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); } static Result get_value(const Ref& _chunk, const int64_t _ix) { - return ArrowTypes>:: - get_value(_chunk, _ix) - .and_then([](const auto& _v) -> Result { - try { - return T(_v); - } catch (const std::exception& e) { - return error(e.what()); - } - }); + return ArrowTypes, + _s>::get_value(_chunk, _ix) + .and_then([](const auto& _v) -> Result { + try { + return T(_v); + } catch (const std::exception& e) { + return error(e.what()); + } + }); } static auto make_builder() { - return ArrowTypes::make_builder(); + return ArrowTypes::make_builder(); } }; -template -struct ArrowTypes> { - using ArrayType = typename ArrowTypes>::ArrayType; - using BuilderType = typename ArrowTypes>::BuilderType; +template +struct ArrowTypes, _s> { + using ArrayType = typename ArrowTypes, _s>::ArrayType; + using BuilderType = + typename ArrowTypes, _s>::BuilderType; - static auto data_type() { return ArrowTypes::data_type(); } + static auto data_type() { return ArrowTypes::data_type(); } static void add_to_builder(const auto& _val, BuilderType* _builder) { if (_val) { - ArrowTypes::add_to_builder(*_val, _builder); + ArrowTypes::add_to_builder(*_val, _builder); } else { const auto status = _builder->AppendNull(); if (!status.ok()) { @@ -415,24 +699,30 @@ struct ArrowTypes> { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); + } + static auto get_value(const Ref& _chunk, const int64_t _ix) { - return ArrowTypes>::get_value(_chunk, _ix) + return ArrowTypes, _s>::get_value(_chunk, _ix) .transform([](const auto& _v) { return std::make_optional(_v); }); } - static auto make_builder() { return ArrowTypes::make_builder(); } + static auto make_builder() { return ArrowTypes::make_builder(); } }; -template -struct ArrowTypes> { - using ArrayType = typename ArrowTypes>::ArrayType; - using BuilderType = typename ArrowTypes>::BuilderType; +template +struct ArrowTypes, _s> { + using ArrayType = typename ArrowTypes, _s>::ArrayType; + using BuilderType = + typename ArrowTypes, _s>::BuilderType; - static auto data_type() { return ArrowTypes::data_type(); } + static auto data_type() { return ArrowTypes::data_type(); } static void add_to_builder(const auto& _val, BuilderType* _builder) { if (_val) { - ArrowTypes::add_to_builder(*_val, _builder); + ArrowTypes::add_to_builder(*_val, _builder); } else { const auto status = _builder->AppendNull(); if (!status.ok()) { @@ -441,24 +731,30 @@ struct ArrowTypes> { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); + } + static auto get_value(const Ref& _chunk, const int64_t _ix) { - return ArrowTypes>::get_value(_chunk, _ix) + return ArrowTypes, _s>::get_value(_chunk, _ix) .transform([](const auto& _v) { return std::make_shared(_v); }); } - static auto make_builder() { return ArrowTypes::make_builder(); } + static auto make_builder() { return ArrowTypes::make_builder(); } }; -template -struct ArrowTypes> { - using ArrayType = typename ArrowTypes>::ArrayType; - using BuilderType = typename ArrowTypes>::BuilderType; +template +struct ArrowTypes, _s> { + using ArrayType = typename ArrowTypes, _s>::ArrayType; + using BuilderType = + typename ArrowTypes, _s>::BuilderType; - static auto data_type() { return ArrowTypes::data_type(); } + static auto data_type() { return ArrowTypes::data_type(); } static void add_to_builder(const auto& _val, BuilderType* _builder) { if (_val) { - ArrowTypes::add_to_builder(*_val, _builder); + ArrowTypes::add_to_builder(*_val, _builder); } else { const auto status = _builder->AppendNull(); if (!status.ok()) { @@ -467,71 +763,197 @@ struct ArrowTypes> { } } + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); + } + static auto get_value(const Ref& _chunk, const int64_t _ix) { - return ArrowTypes>::get_value(_chunk, _ix) + return ArrowTypes, _s>::get_value(_chunk, _ix) .transform([](const auto& _v) { return std::make_unique(_v); }); } - static auto make_builder() { return ArrowTypes::make_builder(); } + static auto make_builder() { return ArrowTypes::make_builder(); } }; -template -struct ArrowTypes> { - using ArrayType = typename ArrowTypes>::ArrayType; - using BuilderType = typename ArrowTypes>::BuilderType; +template +struct ArrowTypes, _s> { + using ArrayType = typename ArrowTypes, _s>::ArrayType; + using BuilderType = + typename ArrowTypes, _s>::BuilderType; - static auto data_type() { return ArrowTypes::data_type(); } + static auto data_type() { return ArrowTypes::data_type(); } static void add_to_builder(const auto& _val, BuilderType* _builder) { - ArrowTypes::add_to_builder(*_val, _builder); + ArrowTypes::add_to_builder(*_val, _builder); + } + + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); } static auto get_value(const Ref& _chunk, const int64_t _ix) { - return ArrowTypes>::get_value(_chunk, _ix) + return ArrowTypes, _s>::get_value(_chunk, _ix) .transform([](const auto& _v) { return Box::make(_v); }); } - static auto make_builder() { return ArrowTypes::make_builder(); } + static auto make_builder() { return ArrowTypes::make_builder(); } }; -template -struct ArrowTypes> { - using ArrayType = typename ArrowTypes>::ArrayType; - using BuilderType = typename ArrowTypes>::BuilderType; +template +struct ArrowTypes, _s> { + using ArrayType = typename ArrowTypes, _s>::ArrayType; + using BuilderType = + typename ArrowTypes, _s>::BuilderType; - static auto data_type() { return ArrowTypes::data_type(); } + static auto data_type() { return ArrowTypes::data_type(); } static void add_to_builder(const auto& _val, BuilderType* _builder) { - ArrowTypes::add_to_builder(*_val, _builder); + ArrowTypes::add_to_builder(*_val, _builder); + } + + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); } static auto get_value(const Ref& _chunk, const int64_t _ix) { - return ArrowTypes>::get_value(_chunk, _ix) + return ArrowTypes, _s>::get_value(_chunk, _ix) .transform([](const auto& _v) { return Ref::make(_v); }); } - static auto make_builder() { return ArrowTypes::make_builder(); } + static auto make_builder() { return ArrowTypes::make_builder(); } }; -template -struct ArrowTypes> { - using ArrayType = typename ArrowTypes>::ArrayType; - using BuilderType = typename ArrowTypes>::BuilderType; +template +struct ArrowTypes, _s> { + using ArrayType = typename ArrowTypes, _s>::ArrayType; + using BuilderType = + typename ArrowTypes, _s>::BuilderType; - static auto data_type() { return ArrowTypes::data_type(); } + static auto data_type() { return ArrowTypes::data_type(); } static void add_to_builder(const auto& _val, BuilderType* _builder) { - ArrowTypes::add_to_builder(_val.value(), _builder); + ArrowTypes::add_to_builder(_val.value(), _builder); + } + + static Result> get_array( + const std::shared_ptr& _arr) { + return ArrowTypes::get_array(_arr); } static auto get_value(const Ref& _chunk, const int64_t _ix) { - return ArrowTypes>::get_value(_chunk, _ix) + return ArrowTypes, _s>::get_value(_chunk, _ix) .transform([](const auto& _v) { return Rename<_name, T>(_v); }); } - static auto make_builder() { return ArrowTypes::make_builder(); } + static auto make_builder() { return ArrowTypes::make_builder(); } }; +template +Result::ArrayType>> +transform_numerical_array_impl( + const std::shared_ptr& _arr) noexcept { + if (!_arr) { + return error( + "transform_numerical_array_impl: std::shared_ptr not set. This is a " + "bug, please report."); + } + + auto builder = ArrowTypes::make_builder(); + + for (int64_t i = 0; i < _arr->length(); ++i) { + if (_arr->IsNull(i)) { + const auto status = builder.AppendNull(); + if (!status.ok()) { + return error(status.message()); + } + } else { + const auto status = builder.Append(static_cast(_arr->Value(i))); + if (!status.ok()) { + return error(status.message()); + } + } + } + + using TargetArrayType = typename ArrowTypes::ArrayType; + + std::shared_ptr res; + const auto status = builder.Finish(&res); + return Ref::make( + std::static_pointer_cast(res)); +} + +template +Result::ArrayType>> transform_numerical_array( + const std::shared_ptr& _arr) noexcept { + if (!_arr) { + return error( + "Could not transform the numerical array. std::shared_ptr not set."); + } + + using ArrayType = typename ArrowTypes::ArrayType; + + if (_arr->type()->Equals(ArrowTypes::data_type())) { + return Ref::make(std::static_pointer_cast(_arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else if (_arr->type()->Equals(ArrowTypes::data_type())) { + return transform_numerical_array_impl( + std::static_pointer_cast::ArrayType>( + _arr)); + + } else { + return error("Expected numerical array, got " + _arr->type()->ToString() + + "."); + } +} + } // namespace rfl::parsing::tabular #endif diff --git a/include/rfl/parsing/tabular/ArrowWriter.hpp b/include/rfl/parsing/tabular/ArrowWriter.hpp index 5deae33d..43c274e0 100644 --- a/include/rfl/parsing/tabular/ArrowWriter.hpp +++ b/include/rfl/parsing/tabular/ArrowWriter.hpp @@ -20,7 +20,7 @@ namespace rfl::parsing::tabular { -template +template class ArrowWriter { static_assert(!Processors::add_tags_to_variants_, "rfl::AddTagsToVariants cannot be used for tabular data."); @@ -43,7 +43,7 @@ class ArrowWriter { std::shared_ptr to_table(const VecType& _data) const { return arrow::Table::Make( - make_arrow_schema>(), + make_arrow_schema, _s>(), to_chunked_arrays(_data)); } @@ -55,13 +55,13 @@ class ArrowWriter { size_t chunksize_; }; -template +template std::vector> -ArrowWriter::to_chunked_arrays(const VecType& _data) const { +ArrowWriter::to_chunked_arrays(const VecType& _data) const { using ValueType = typename VecType::value_type; auto builders = - make_arrow_builders>(); + make_arrow_builders, _s>(); constexpr size_t size = tuple_size_v; @@ -78,7 +78,7 @@ ArrowWriter::to_chunked_arrays(const VecType& _data) const { [&](const auto& _v, auto* _b, std::integer_sequence) { - (add_to_builder(*get<_is>(_v), &(_b->template get<_is>())), ...); + (add_to_builder<_s>(*get<_is>(_v), &(_b->template get<_is>())), ...); }(view, &builders, std::make_integer_sequence()); } @@ -103,7 +103,7 @@ ArrowWriter::to_chunked_arrays(const VecType& _data) const { } } - const auto data_types = make_arrow_data_types(); + const auto data_types = make_arrow_data_types(); return [&](std::integer_sequence) { return std::vector>( diff --git a/include/rfl/parsing/tabular/ChunkedArrayIterator.hpp b/include/rfl/parsing/tabular/ChunkedArrayIterator.hpp index 68b229bf..776756ea 100644 --- a/include/rfl/parsing/tabular/ChunkedArrayIterator.hpp +++ b/include/rfl/parsing/tabular/ChunkedArrayIterator.hpp @@ -16,13 +16,13 @@ namespace rfl::parsing::tabular { -template +template class ChunkedArrayIterator { public: using difference_type = std::ptrdiff_t; using value_type = Result; - using ArrayType = array_t; + using ArrayType = array_t; static ChunkedArrayIterator make(const Ref& _arr) { return ChunkedArrayIterator(_arr); @@ -48,14 +48,12 @@ class ChunkedArrayIterator { } return current_chunk_.and_then( - [&](const auto& _c) { return ArrowTypes::get_value(_c, ix_); }); + [&](const auto& _c) { return ArrowTypes::get_value(_c, ix_); }); } - bool end() const noexcept { - return !current_chunk_ || (chunk_ix_ >= arr_->num_chunks()); - } + bool end() const noexcept { return chunk_ix_ >= arr_->num_chunks(); } - ChunkedArrayIterator& operator++() noexcept { + ChunkedArrayIterator& operator++() noexcept { if (!current_chunk_) { return *this; } @@ -74,8 +72,7 @@ class ChunkedArrayIterator { static Result> get_chunk(const Ref& _arr, const int _chunk_ix) noexcept { if (_chunk_ix < _arr->num_chunks()) { - return Ref::make( - std::static_pointer_cast(_arr->chunk(_chunk_ix))); + return ArrowTypes::get_array(_arr->chunk(_chunk_ix)); } else { return error("chunk_ix out of bounds."); } diff --git a/include/rfl/parsing/tabular/add_to_builder.hpp b/include/rfl/parsing/tabular/add_to_builder.hpp index 1d42a22f..d353cbc5 100644 --- a/include/rfl/parsing/tabular/add_to_builder.hpp +++ b/include/rfl/parsing/tabular/add_to_builder.hpp @@ -8,9 +8,10 @@ namespace rfl::parsing::tabular { -template +template inline void add_to_builder(const ValueType& _val, BuilderType* _builder) { - ArrowTypes>::add_to_builder(_val, _builder); + ArrowTypes, _s>::add_to_builder(_val, + _builder); } } // namespace rfl::parsing::tabular diff --git a/include/rfl/parsing/tabular/array_t.hpp b/include/rfl/parsing/tabular/array_t.hpp index 6d083af8..2e8cd7e5 100644 --- a/include/rfl/parsing/tabular/array_t.hpp +++ b/include/rfl/parsing/tabular/array_t.hpp @@ -5,8 +5,8 @@ namespace rfl::parsing::tabular { -template -using array_t = typename ArrowTypes>::ArrayType; +template +using array_t = typename ArrowTypes, _s>::ArrayType; } // namespace rfl::parsing::tabular diff --git a/include/rfl/parsing/tabular/make_arrow_builders.hpp b/include/rfl/parsing/tabular/make_arrow_builders.hpp index 501694ee..33955f34 100644 --- a/include/rfl/parsing/tabular/make_arrow_builders.hpp +++ b/include/rfl/parsing/tabular/make_arrow_builders.hpp @@ -16,40 +16,42 @@ namespace rfl::parsing::tabular { -template -using arrow_builder_t = typename ArrowTypes< - std::remove_cvref_t>>::BuilderType; +template +using arrow_builder_t = + typename ArrowTypes>, + _s>::BuilderType; -template +template struct ArrowBuildersType; -template -struct ArrowBuildersType> { - using Type = Tuple...>; +template +struct ArrowBuildersType, _s> { + using Type = Tuple...>; static auto data_types() { return [&](std::integer_sequence) { return std::array, sizeof...(FieldTypes)>( - {ArrowTypes::data_type()...}); + {ArrowTypes::data_type()...}); }(std::make_integer_sequence()); } static Type make_builders() { - return Type(ArrowTypes::make_builder()...); + return Type(ArrowTypes::make_builder()...); } static auto schema() { - const auto fields = std::vector>( - {arrow::field(typename FieldTypes::Name().str(), - ArrowTypes::data_type())...}); + const auto fields = + std::vector>({arrow::field( + typename FieldTypes::Name().str(), + ArrowTypes::data_type())...}); return arrow::schema(fields); } }; -template +template auto make_arrow_builders() { - return ArrowBuildersType>::make_builders(); + return ArrowBuildersType, _s>::make_builders(); } } // namespace rfl::parsing::tabular diff --git a/include/rfl/parsing/tabular/make_arrow_data_types.hpp b/include/rfl/parsing/tabular/make_arrow_data_types.hpp index 0fb237bd..d153fcfc 100644 --- a/include/rfl/parsing/tabular/make_arrow_data_types.hpp +++ b/include/rfl/parsing/tabular/make_arrow_data_types.hpp @@ -8,9 +8,10 @@ namespace rfl::parsing::tabular { -template +template inline auto make_arrow_data_types() { - return ArrowBuildersType>>::data_types(); + return ArrowBuildersType>, + _s>::data_types(); } } // namespace rfl::parsing::tabular diff --git a/include/rfl/parsing/tabular/make_arrow_schema.hpp b/include/rfl/parsing/tabular/make_arrow_schema.hpp index b9c6268a..8139b451 100644 --- a/include/rfl/parsing/tabular/make_arrow_schema.hpp +++ b/include/rfl/parsing/tabular/make_arrow_schema.hpp @@ -8,9 +8,9 @@ namespace rfl::parsing::tabular { -template +template inline auto make_arrow_schema() { - return ArrowBuildersType>>::schema(); + return ArrowBuildersType>, _s>::schema(); } } // namespace rfl::parsing::tabular diff --git a/include/rfl/parsing/tabular/make_chunked_array_iterators.hpp b/include/rfl/parsing/tabular/make_chunked_array_iterators.hpp index 697b8b87..3bd6dc9e 100644 --- a/include/rfl/parsing/tabular/make_chunked_array_iterators.hpp +++ b/include/rfl/parsing/tabular/make_chunked_array_iterators.hpp @@ -15,17 +15,17 @@ #include "../../Ref.hpp" #include "../../Result.hpp" #include "../../Tuple.hpp" -#include "ArrowTypes.hpp" #include "ChunkedArrayIterator.hpp" namespace rfl::parsing::tabular { -template +template struct MakeChunkedArrayIterators; -template -struct MakeChunkedArrayIterators> { - using TupleType = Tuple...>; +template +struct MakeChunkedArrayIterators, _s> { + using TupleType = + Tuple...>; Result operator()(const Ref& _table) const { const auto get_column = @@ -40,7 +40,8 @@ struct MakeChunkedArrayIterators> { try { return TupleType( get_column(typename FieldTypes::Name().str()) - .transform(ChunkedArrayIterator::make) + .transform( + ChunkedArrayIterator::make) .value()...); } catch (const std::exception& e) { return error(e.what()); @@ -48,9 +49,9 @@ struct MakeChunkedArrayIterators> { } }; -template +template const auto make_chunked_array_iterators = - MakeChunkedArrayIterators{}; + MakeChunkedArrayIterators{}; } // namespace rfl::parsing::tabular diff --git a/mkdocs.yaml b/mkdocs.yaml index ae77201f..38aefa22 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -96,9 +96,11 @@ nav: - BSON: supported_formats/bson.md - Cap'n Proto: supported_formats/capnproto.md - CBOR: supported_formats/cbor.md + - CSV: supported_formats/csv.md - FlexBuffers: supported_formats/flexbuffers.md - JSON: supported_formats/json.md - MessagePack: supported_formats/msgpack.md + - Parquet: supported_formats/parquet.md - TOML: supported_formats/toml.md - UBJSON: supported_formats/ubjson.md - XML: supported_formats/xml.md diff --git a/reflectcpp-config.cmake.in b/reflectcpp-config.cmake.in index c38da966..128a89c7 100644 --- a/reflectcpp-config.cmake.in +++ b/reflectcpp-config.cmake.in @@ -4,8 +4,10 @@ set(REFLECTCPP_JSON @REFLECTCPP_JSON@) set(REFLECTCPP_BSON @REFLECTCPP_BSON@) set(REFLECTCPP_CAPNPROTO @REFLECTCPP_CAPNPROTO@) set(REFLECTCPP_CBOR @REFLECTCPP_CBOR@) +set(REFLECTCPP_CSV @REFLECTCPP_CSV@) set(REFLECTCPP_FLEXBUFFERS @REFLECTCPP_FLEXBUFFERS@) set(REFLECTCPP_MSGPACK @REFLECTCPP_MSGPACK@) +set(REFLECTCPP_PARQUET @REFLECTCPP_PARQUET@) set(REFLECTCPP_TOML @REFLECTCPP_TOML@) set(REFLECTCPP_UBJSON @REFLECTCPP_UBJSON@) set(REFLECTCPP_XML @REFLECTCPP_XML@) @@ -38,6 +40,10 @@ if (REFLECTCPP_CBOR OR REFLECTCPP_UBJSON) find_dependency(jsoncons) endif () +if (REFLECTCPP_CSV) + find_dependency(Arrow) +endif() + if (REFLECTCPP_FLEXBUFFERS) find_dependency(flatbuffers) endif () @@ -46,6 +52,11 @@ if (REFLECTCPP_MSGPACK) find_dependency(msgpack-c) endif() +if (REFLECTCPP_PARQUET) + find_dependency(Arrow) + find_dependency(Parquet) +endif() + if (REFLECTCPP_TOML) find_dependency(tomlplusplus) endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9e559b73..bfcab240 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -28,6 +28,10 @@ if (REFLECTCPP_CBOR) add_subdirectory(cbor) endif() +if (REFLECTCPP_CSV) + add_subdirectory(csv) +endif() + if (REFLECTCPP_FLEXBUFFERS) add_subdirectory(flexbuffers) endif() diff --git a/tests/csv/CMakeLists.txt b/tests/csv/CMakeLists.txt new file mode 100644 index 00000000..a435a433 --- /dev/null +++ b/tests/csv/CMakeLists.txt @@ -0,0 +1,21 @@ +project(reflect-cpp-csv-tests) + +file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS "*.cpp") + +add_executable( + reflect-cpp-csv-tests + ${SOURCES} +) +target_precompile_headers(reflect-cpp-csv-tests PRIVATE [["rfl.hpp"]] ) + +target_include_directories(reflect-cpp-csv-tests SYSTEM PRIVATE "${VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/include") + +target_link_libraries( + reflect-cpp-csv-tests + PRIVATE + "${REFLECT_CPP_GTEST_LIB}" +) + +find_package(GTest) +gtest_discover_tests(reflect-cpp-csv-tests) + diff --git a/tests/csv/test_boolean.cpp b/tests/csv/test_boolean.cpp new file mode 100644 index 00000000..6d9b01c1 --- /dev/null +++ b/tests/csv/test_boolean.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_boolean { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + bool is_child; + Age age; + rfl::Email email; +}; + +TEST(csv, test_boolean) { + const auto people = + std::vector({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .is_child = true, + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .is_child = true, + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .is_child = true, + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .is_child = false, + .age = 45, + .email = "homer@simpson.com"}}); + + write_and_read(people); +} +} // namespace test_boolean diff --git a/tests/csv/test_box.cpp b/tests/csv/test_box.cpp new file mode 100644 index 00000000..1470b416 --- /dev/null +++ b/tests/csv/test_box.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_box { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Box email; +}; + +TEST(csv, test_box) { + auto people = std::vector(); + people.emplace_back( + Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = rfl::make_box("bart@simpson.com")}); + people.emplace_back( + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = rfl::make_box("lisa@simpson.com")}); + people.emplace_back( + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = rfl::make_box("maggie@simpson.com")}); + people.emplace_back( + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = rfl::make_box("homer@simpson.com")}); + + write_and_read(people); +} +} // namespace test_box diff --git a/tests/csv/test_bytestring.cpp b/tests/csv/test_bytestring.cpp new file mode 100644 index 00000000..abd7d09c --- /dev/null +++ b/tests/csv/test_bytestring.cpp @@ -0,0 +1,24 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_bytestring { + +struct TestStruct { + rfl::Bytestring bytestring; +}; + +TEST(csv, test_bytestring) { + const auto test_struct = + TestStruct{.bytestring = rfl::Bytestring({std::byte{13}, std::byte{14}, + std::byte{15}, std::byte{16}})}; + + const auto test_structs = std::vector( + {test_struct, test_struct, test_struct, test_struct}); + + write_and_read(test_structs); +} +} // namespace test_bytestring diff --git a/tests/csv/test_camel_case.cpp b/tests/csv/test_camel_case.cpp new file mode 100644 index 00000000..6d72baaf --- /dev/null +++ b/tests/csv/test_camel_case.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_camel_case { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + std::string first_name; + std::string last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; +}; + +TEST(csv, test_camel_case) { + const auto people = + std::vector({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + write_and_read(people); +} +} // namespace test_camel_case diff --git a/tests/csv/test_deque.cpp b/tests/csv/test_deque.cpp new file mode 100644 index 00000000..d863ae69 --- /dev/null +++ b/tests/csv/test_deque.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_deque { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; +}; + +TEST(csv, test_deque) { + const auto people = + std::deque({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + write_and_read(people); +} +} // namespace test_deque diff --git a/tests/csv/test_enums.cpp b/tests/csv/test_enums.cpp new file mode 100644 index 00000000..17af9078 --- /dev/null +++ b/tests/csv/test_enums.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_enums { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +enum class FirstName { Bart, Lisa, Maggie, Homer }; + +struct Person { + rfl::Rename<"firstName", FirstName> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; +}; + +TEST(csv, test_enums) { + const auto people = + std::vector({Person{.first_name = FirstName::Bart, + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = FirstName::Lisa, + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = FirstName::Lisa, + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = FirstName::Homer, + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + write_and_read(people); +} +} // namespace test_enums diff --git a/tests/csv/test_flatten.cpp b/tests/csv/test_flatten.cpp new file mode 100644 index 00000000..2e0ff859 --- /dev/null +++ b/tests/csv/test_flatten.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_flatten { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Address { + std::string street; + std::string city; +}; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; + rfl::Flatten
address; +}; + +TEST(csv, test_flatten) { + const auto address = + Address{.street = "Evergreen Terrace", .city = "Springfield"}; + + const auto people = std::vector({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com", + .address = address}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com", + .address = address}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com", + .address = address}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com", + .address = address}}); + + write_and_read(people); +} +} // namespace test_flatten diff --git a/tests/csv/test_literal.cpp b/tests/csv/test_literal.cpp new file mode 100644 index 00000000..a5f74397 --- /dev/null +++ b/tests/csv/test_literal.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_literal { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + std::string first_name; + rfl::Literal<"Simpson"> last_name; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; +}; + +TEST(csv, test_literal) { + const auto people = + std::vector({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + write_and_read(people); +} +} // namespace test_literal diff --git a/tests/csv/test_optionals.cpp b/tests/csv/test_optionals.cpp new file mode 100644 index 00000000..3c99df9f --- /dev/null +++ b/tests/csv/test_optionals.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_optionals { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + std::optional email; +}; + +TEST(csv, test_optionals) { + const auto people = std::vector( + {Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", .birthday = "1987-04-19", .age = 0}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + write_and_read(people); +} +} // namespace test_optionals diff --git a/tests/csv/test_readme_example.cpp b/tests/csv/test_readme_example.cpp new file mode 100644 index 00000000..9bd29186 --- /dev/null +++ b/tests/csv/test_readme_example.cpp @@ -0,0 +1,42 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_readme_example { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + std::string first_name; + std::string last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; +}; + +TEST(csv, test_readme_example) { + const auto people = + std::vector({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + write_and_read(people); +} +} // namespace test_readme_example diff --git a/tests/csv/test_ref.cpp b/tests/csv/test_ref.cpp new file mode 100644 index 00000000..2bf21901 --- /dev/null +++ b/tests/csv/test_ref.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_ref { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Ref email; +}; + +TEST(csv, test_ref) { + auto people = std::vector(); + people.emplace_back( + Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = rfl::make_ref("bart@simpson.com")}); + people.emplace_back( + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = rfl::make_ref("lisa@simpson.com")}); + people.emplace_back( + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = rfl::make_ref("maggie@simpson.com")}); + people.emplace_back( + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = rfl::make_ref("homer@simpson.com")}); + + write_and_read(people); +} +} // namespace test_ref diff --git a/tests/csv/test_save_load.cpp b/tests/csv/test_save_load.cpp new file mode 100644 index 00000000..5f1cd69b --- /dev/null +++ b/tests/csv/test_save_load.cpp @@ -0,0 +1,55 @@ +#include + +#include +#include +#include +#include +#include +#include + +namespace test_save_load { + +using Age = + rfl::Validator, rfl::Maximum<130>>>; + +struct Person { + std::string first_name; + std::string last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; +}; + +TEST(csv, test_save_load) { + const auto people1 = + std::vector({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + const auto settings = rfl::csv::Settings{}.with_delimiter(';'); + + rfl::csv::save("people.csv", people1, settings); + + const auto people2 = + rfl::csv::load>("people.csv", settings).value(); + + const auto bytes1 = rfl::csv::write(people1); + const auto bytes2 = rfl::csv::write(people2); + + EXPECT_EQ(bytes1, bytes2); +} +} // namespace test_save_load diff --git a/tests/csv/test_settings.cpp b/tests/csv/test_settings.cpp new file mode 100644 index 00000000..cc228cde --- /dev/null +++ b/tests/csv/test_settings.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_settings { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + std::string first_name; + std::string last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + rfl::Email email; +}; + +TEST(csv, test_settings) { + const auto people = + std::vector({Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = "bart@simpson.com"}, + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = "lisa@simpson.com"}, + Person{.first_name = "Maggie", + .birthday = "1987-04-19", + .age = 0, + .email = "maggie@simpson.com"}, + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = "homer@simpson.com"}}); + + const auto settings = rfl::csv::Settings{}.with_delimiter(';'); + + write_and_read(people, settings); +} +} // namespace test_settings diff --git a/tests/csv/test_shared_ptr.cpp b/tests/csv/test_shared_ptr.cpp new file mode 100644 index 00000000..72cab308 --- /dev/null +++ b/tests/csv/test_shared_ptr.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_shared_ptr { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + std::shared_ptr email; +}; + +TEST(csv, test_shared_ptr) { + auto people = std::vector(); + people.emplace_back( + Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = std::make_shared("bart@simpson.com")}); + people.emplace_back( + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = std::make_shared("lisa@simpson.com")}); + people.emplace_back( + Person{.first_name = "Maggie", .birthday = "1987-04-19", .age = 0}); + people.emplace_back( + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = std::make_shared("homer@simpson.com")}); + + write_and_read(people); +} +} // namespace test_shared_ptr diff --git a/tests/csv/test_unique_ptr.cpp b/tests/csv/test_unique_ptr.cpp new file mode 100644 index 00000000..b41cb1fa --- /dev/null +++ b/tests/csv/test_unique_ptr.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include + +#include "write_and_read.hpp" + +namespace test_unique_ptr { + +using Age = rfl::Validator, rfl::Maximum<130>>; + +struct Person { + rfl::Rename<"firstName", std::string> first_name; + rfl::Rename<"lastName", std::string> last_name = "Simpson"; + std::string town = "Springfield"; + rfl::Timestamp<"%Y-%m-%d"> birthday; + Age age; + std::unique_ptr email; +}; + +TEST(csv, test_unique_ptr) { + auto people = std::vector(); + people.emplace_back( + Person{.first_name = "Bart", + .birthday = "1987-04-19", + .age = 10, + .email = std::make_unique("bart@simpson.com")}); + people.emplace_back( + Person{.first_name = "Lisa", + .birthday = "1987-04-19", + .age = 8, + .email = std::make_unique("lisa@simpson.com")}); + people.emplace_back( + Person{.first_name = "Maggie", .birthday = "1987-04-19", .age = 0}); + people.emplace_back( + Person{.first_name = "Homer", + .birthday = "1987-04-19", + .age = 45, + .email = std::make_unique("homer@simpson.com")}); + + write_and_read(people); +} +} // namespace test_unique_ptr diff --git a/tests/csv/write_and_read.hpp b/tests/csv/write_and_read.hpp new file mode 100644 index 00000000..4640e962 --- /dev/null +++ b/tests/csv/write_and_read.hpp @@ -0,0 +1,22 @@ +#ifndef WRITE_AND_READ_ +#define WRITE_AND_READ_ + +#include + +#include +#include +#include + +template +void write_and_read(const auto& _vec, const rfl::csv::Settings& _settings = + rfl::csv::Settings{}) { + using T = std::remove_cvref_t; + const auto serialized1 = rfl::csv::write(_vec, _settings); + const auto res = rfl::csv::read(serialized1, _settings); + EXPECT_TRUE(res && true) << "Test failed on read. Error: " + << res.error().what(); + const auto serialized2 = rfl::csv::write(res.value(), _settings); + EXPECT_EQ(serialized1, serialized2); +} + +#endif diff --git a/vcpkg.json b/vcpkg.json index 635d4e09..79c2900b 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -74,6 +74,16 @@ } ] }, + "csv": { + "description": "Enable CSV support", + "dependencies": [ + { + "name": "arrow", + "version>=": "21.0.0", + "features": ["csv"] + } + ] + }, "ctre": { "description": "Install CTRE using vcpkg instead of using the bundled version", "dependencies": [