getml · liuzicheng1987 · Sep 7, 2025 · Sep 3, 2025 · Sep 6, 2025 · Sep 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -45,6 +45,7 @@
 *.bson
 *.capnproto
 *.cbor
+*.csv
 *.json
 *.fb
 *.flexbuf

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -9,6 +9,7 @@ option(REFLECTCPP_AVRO "Enable AVRO support" ${REFLECTCPP_ALL_FORMATS})
 option(REFLECTCPP_BSON "Enable BSON support" ${REFLECTCPP_ALL_FORMATS})
 option(REFLECTCPP_CAPNPROTO "Enable Cap’n Proto support" ${REFLECTCPP_ALL_FORMATS})
 option(REFLECTCPP_CBOR "Enable CBOR support" ${REFLECTCPP_ALL_FORMATS})
+option(REFLECTCPP_CSV "Enable CSV support" ${REFLECTCPP_ALL_FORMATS})
 option(REFLECTCPP_FLEXBUFFERS "Enable flexbuffers support" ${REFLECTCPP_ALL_FORMATS})
 option(REFLECTCPP_MSGPACK "Enable msgpack support" ${REFLECTCPP_ALL_FORMATS})
 option(REFLECTCPP_PARQUET "Enable parquet support" ${REFLECTCPP_ALL_FORMATS})
@@ -55,8 +56,8 @@ endif()
 
 if (REFLECTCPP_BUILD_TESTS OR REFLECTCPP_BUILD_BENCHMARKS OR
     (REFLECTCPP_JSON AND NOT REFLECTCPP_USE_BUNDLED_DEPENDENCIES) OR REFLECTCPP_AVRO OR
-    REFLECTCPP_BSON OR REFLECTCPP_CAPNPROTO OR REFLECTCPP_CBOR OR REFLECTCPP_FLEXBUFFERS OR 
-    REFLECTCPP_MSGPACK OR REFLECTCPP_PARQUET OR REFLECTCPP_XML OR 
+    REFLECTCPP_BSON OR REFLECTCPP_CAPNPROTO OR REFLECTCPP_CBOR OR REFLECTCPP_CSV OR
+    REFLECTCPP_FLEXBUFFERS OR REFLECTCPP_MSGPACK OR REFLECTCPP_PARQUET OR REFLECTCPP_XML OR 
     REFLECTCPP_TOML OR REFLECTCPP_UBJSON OR REFLECTCPP_YAML)
     # enable vcpkg per default if features other than JSON are required
     set(REFLECTCPP_USE_VCPKG_DEFAULT ON)
@@ -95,6 +96,10 @@ if (REFLECTCPP_USE_VCPKG)
         list(APPEND VCPKG_MANIFEST_FEATURES "cbor")
     endif()
 
+    if (REFLECTCPP_CSV)
+        list(APPEND VCPKG_MANIFEST_FEATURES "csv")
+    endif()
+
     if (NOT REFLECTCPP_USE_BUNDLED_DEPENDENCIES)
         list(APPEND VCPKG_MANIFEST_FEATURES "ctre")
     endif()
@@ -246,6 +251,13 @@ if (REFLECTCPP_CBOR)
     include_directories(PUBLIC ${jsoncons_INCLUDE_DIRS})
 endif ()
 
+if (REFLECTCPP_CSV)
+    if (NOT TARGET Arrow)
+        find_package(Arrow CONFIG REQUIRED)
+    endif()
+    target_link_libraries(reflectcpp PUBLIC "$<IF:$<BOOL:${ARROW_BUILD_STATIC}>,Arrow::arrow_static,Arrow::arrow_shared>")
+endif ()
+
 if (REFLECTCPP_FLEXBUFFERS)
     list(APPEND REFLECT_CPP_SOURCES
         src/reflectcpp_flexbuf.cpp

diff --git a/docs/supported_formats/csv.md b/docs/supported_formats/csv.md
@@ -0,0 +1,218 @@
+# csv
+
+For CSV support, include the header `<rfl/csv.hpp>` and link to the [Apache Arrow](https://arrow.apache.org/) library.
+Furthermore, when compiling reflect-cpp, you need to pass `-DREFLECTCPP_CSV=ON` to cmake.
+
+CSV is a tabular text format. Like other tabular formats in reflect-cpp, CSV is designed for collections of flat records and has limitations for nested or variant types.
+
+## Reading and writing
+
+Suppose you have a struct like this:
+
+```cpp
+struct Person {
+    std::string first_name;
+    std::string last_name = "Simpson";
+    rfl::Timestamp<"%Y-%m-%d"> birthday;
+    unsigned int age;
+    rfl::Email email;
+};
+```
+
+Important: CSV is a tabular format that requires collections of records. You cannot serialize individual structs - you must use containers like `std::vector<Person>`, `std::deque<Person>`, etc.
+
+Write a collection to a string (CSV bytes) like this:
+
+```cpp
+const auto people = std::vector<Person>{
+    Person{.first_name = "Bart", .birthday = "1987-04-19", .age = 10, .email = "bart@simpson.com"},
+    Person{.first_name = "Lisa", .birthday = "1987-04-19", .age = 8, .email = "lisa@simpson.com"}
+};
+
+const std::string csv_text = rfl::csv::write(people);
+```
+
+Parse from a string or bytes view:
+
+```cpp
+const rfl::Result<std::vector<Person>> result = rfl::csv::read<std::vector<Person>>(csv_text);
+```
+
+## Settings
+
+CSV behavior can be configured using `rfl::csv::Settings`:
+
+```cpp
+const auto settings = rfl::csv::Settings{}
+    .with_delimiter(';')
+    .with_quoting(true)
+    .with_quote_char('"')
+    .with_null_string("n/a")
+    .with_double_quote(true)
+    .with_escaping(false)
+    .with_escape_char('\\')
+    .with_newlines_in_values(false)
+    .with_ignore_empty_lines(true)
+    .with_batch_size(1024);
+
+const std::string csv_text = rfl::csv::write(people, settings);
+```
+
+Key options:
+- `batch_size` - Maximum number of rows processed per batch (performance tuning)
+- `delimiter` - Field delimiter character
+- `quoting` - Whether to use quoting when writing
+- `quote_char` - Quote character used when reading
+- `null_string` - String representation for null values
+- `double_quote` - Whether a quote inside a value is double-quoted (reading)
+- `escaping` - Whether escaping is used (reading)
+- `escape_char` - Escape character (reading)
+- `newlines_in_values` - Whether CR/LF are allowed inside values (reading)
+- `ignore_empty_lines` - Whether empty lines are ignored (reading)
+
+## Loading and saving
+
+You can load from and save to disk:
+
+```cpp
+const rfl::Result<std::vector<Person>> result = rfl::csv::load<std::vector<Person>>("/path/to/file.csv");
+
+const auto people = std::vector<Person>{...};
+rfl::csv::save("/path/to/file.csv", people);
+```
+
+With custom settings:
+
+```cpp
+const auto settings = rfl::csv::Settings{}.with_delimiter(';');
+rfl::csv::save("/path/to/file.csv", people, settings);
+```
+
+## Reading from and writing into streams
+
+You can read from any `std::istream` and write to any `std::ostream`:
+
+```cpp
+const rfl::Result<std::vector<Person>> result = rfl::csv::read<std::vector<Person>>(my_istream);
+
+const auto people = std::vector<Person>{...};
+rfl::csv::write(people, my_ostream);
+```
+
+With custom settings:
+
+```cpp
+const auto settings = rfl::csv::Settings{}.with_delimiter(';');
+rfl::csv::write(people, my_ostream, settings);
+```
+
+## Field name transformations
+
+Like other formats, CSV supports field name transformations via processors, e.g. `SnakeCaseToCamelCase`:
+
+```cpp
+const auto people = std::vector<Person>{...};
+const auto result = rfl::csv::read<std::vector<Person>, rfl::SnakeCaseToCamelCase>(csv_text);
+```
+
+## Enums and validation
+
+CSV supports enums and validated types. Enums are written/read as strings:
+
+```cpp
+enum class FirstName { Bart, Lisa, Maggie, Homer };
+
+struct Person {
+    rfl::Rename<"firstName", FirstName> first_name;
+    rfl::Rename<"lastName", std::string> last_name;
+    rfl::Timestamp<"%Y-%m-%d"> birthday;
+    rfl::Validator<unsigned int, rfl::Minimum<0>, rfl::Maximum<130>> age;
+    rfl::Email email;
+};
+```
+
+## Limitations of tabular formats
+
+CSV, like other tabular formats, has limitations compared to hierarchical formats such as JSON or XML:
+
+### Collections requirement
+You must serialize collections, not individual objects:
+```cpp
+std::vector<Person> people = {...};  // ✅ Correct
+Person person = {...};               // ❌ Wrong - must be in a container
+```
+
+### No nested objects
+Each field must be a primitive type, enum, or a simple validated type. Nested objects are not automatically flattened:
+```cpp
+// This would NOT work as expected - nested objects are not automatically flattened
+struct Address {
+    std::string street;
+    std::string city;
+};
+
+struct Person {
+    std::string first_name;
+    std::string last_name;
+    Address address;  // ❌ Will cause compilation errors for CSV
+};
+```
+
+### Using rfl::Flatten for nested objects
+If you need to include nested objects, use `rfl::Flatten` to explicitly flatten them:
+```cpp
+struct Address {
+    std::string street;
+    std::string city;
+};
+
+struct Person {
+    std::string first_name;
+    std::string last_name;
+    rfl::Flatten<Address> address;  // ✅ This will flatten the Address fields
+};
+
+// The resulting CSV will have columns: first_name, last_name, street, city
+```
+
+### No variant types
+Variant types like `std::variant`, `rfl::Variant`, or `rfl::TaggedUnion` cannot be serialized to CSV as separate columns:
+```cpp
+// ❌ This will NOT work
+struct Person {
+    std::string first_name;
+    std::variant<std::string, int> status;  // Variant - not supported
+    rfl::Variant<std::string, int> type;    // rfl::Variant - not supported
+    rfl::TaggedUnion<"type", std::string, int> category;  // TaggedUnion - not supported
+};
+```
+
+### No arrays (except bytestrings)
+CSV output here does not support arrays (lists) of values in a single column. The only array-like field supported is binary data represented as bytestrings:
+```cpp
+// ❌ This will NOT work
+struct Person {
+    std::string first_name;
+    std::vector<std::string> hobbies;  // Array of strings - not supported
+    std::vector<int> scores;           // Array of integers - not supported
+    std::vector<Address> addresses;    // Array of objects - not supported
+};
+
+// ✅ This works
+struct Blob {
+    std::vector<char> binary_data;      // Binary data supported as bytestring
+};
+```
+
+### Use cases
+CSV is ideal for:
+- Data exchange and interoperability
+- Simple, flat data structures with consistent types
+- Human-readable datasets
+
+CSV is less suitable for:
+- Complex nested data structures
+- Data with arrays or variant types
+- Strict schemas with evolving types
+- Very large datasets where binary columnar formats are preferred
+
diff --git a/docs/supported_formats/parquet.md b/docs/supported_formats/parquet.md
@@ -1,3 +1,5 @@
+# parquet
+
 For Parquet support, you must also include the header `<rfl/parquet.hpp>` and link to the [Apache Arrow](https://arrow.apache.org/) and [Apache Parquet](https://parquet.apache.org/) libraries.
 Furthermore, when compiling reflect-cpp, you need to pass `-DREFLECTCPP_PARQUET=ON` to cmake.
 

diff --git a/include/rfl/Timestamp.hpp b/include/rfl/Timestamp.hpp
@@ -64,6 +64,12 @@ class Timestamp {
     return from_string(_str.c_str());
   }
 
+  /// Returns a result containing the timestamp when successful or an Error
+  /// otherwise.
+  static Result<Timestamp> make(const auto& _str) noexcept {
+    return from_string(_str);
+  }
+
   /// Necessary for the serialization to work.
   ReflectionType reflection() const {
     char outstr[200];

diff --git a/include/rfl/csv.hpp b/include/rfl/csv.hpp
@@ -0,0 +1,10 @@
+#ifndef RFL_CSV_HPP_
+#define RFL_CSV_HPP_
+
+#include "../rfl.hpp"
+#include "csv/load.hpp"
+#include "csv/read.hpp"
+#include "csv/save.hpp"
+#include "csv/write.hpp"
+
+#endif
diff --git a/include/rfl/csv/Settings.hpp b/include/rfl/csv/Settings.hpp
@@ -0,0 +1,97 @@
+#ifndef RFL_CSV_SETTINGS_HPP_
+#define RFL_CSV_SETTINGS_HPP_
+
+#include <arrow/csv/api.h>
+#include <arrow/io/api.h>
+
+#include "../Field.hpp"
+#include "../replace.hpp"
+
+namespace rfl::csv {
+
+struct Settings {
+  /// Maximum number of rows processed at a time.
+  /// Data is processed in batches of N rows. This number
+  /// can impact performance.
+  int32_t batch_size = 1024;
+
+  /// Field delimiter.
+  char delimiter = ',';
+
+  /// Whether quoting is used.
+  bool quoting = true;
+
+  /// Quoting character (if quoting is true). Only relevant for reading.
+  char quote_char = '"';
+
+  /// The string to be used for null values. Quotes are not allowed in this
+  /// string.
+  std::string null_string = "n/a";
+
+  /// Whether a quote inside a value is double-quoted. Only relevant for
+  /// reading.
+  bool double_quote = true;
+
+  /// Whether escaping is used. Only relevant for reading.
+  bool escaping = false;
+
+  /// Escaping character (if escaping is true). Only relevant for reading.
+  char escape_char = arrow::csv::kDefaultEscapeChar;
+
+  /// Whether values are allowed to contain CR (0x0d) and LF (0x0a)
+  /// characters. Only relevant for reading.
+  bool newlines_in_values = false;
+
+  /// Whether empty lines are ignored.
+  /// If false, an empty line represents a single empty value (assuming a
+  /// one-column CSV file). Only relevant for reading.
+  bool ignore_empty_lines = true;
+
+  Settings with_batch_size(const int32_t _batch_size) const noexcept {
+    return replace(*this, make_field<"batch_size">(_batch_size));
+  }
+
+  Settings with_delimiter(const char _delimiter) const noexcept {
+    return replace(*this, make_field<"delimiter">(_delimiter));
+  }
+
+  Settings with_quoting(const bool _quoting) const noexcept {
+    return replace(*this, make_field<"quoting">(_quoting));
+  }
+
+  Settings with_quote_char(const char _quote_char) const noexcept {
+    return replace(*this, make_field<"quote_char">(_quote_char));
+  }
+
+  Settings with_null_string(const std::string& _null_string) const noexcept {
+    return replace(*this, make_field<"null_string">(_null_string));
+  }
+
+  Settings with_double_quote(const bool _double_quote) const noexcept {
+    return replace(*this, make_field<"double_quote">(_double_quote));
+  }
+
+  Settings with_escaping(const bool _escaping) const noexcept {
+    return replace(*this, make_field<"escaping">(_escaping));
+  }
+
+  Settings with_escape_char(const char _escape_char) const noexcept {
+    return replace(*this, make_field<"escape_char">(_escape_char));
+  }
+
+  Settings with_newlines_in_values(
+      const bool _newlines_in_values) const noexcept {
+    return replace(*this,
+                   make_field<"newlines_in_values">(_newlines_in_values));
+  }
+
+  Settings with_ignore_empty_lines(
+      const bool _ignore_empty_lines) const noexcept {
+    return replace(*this,
+                   make_field<"ignore_empty_lines">(_ignore_empty_lines));
+  }
+};
+
+}  // namespace rfl::csv
+
+#endif
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,6 +45,7 @@ @@
     *.bson
     *.capnproto
     *.cbor
+    *.csv
     *.json
     *.fb
     *.flexbuf
@@ Expand Down @@