Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
*.bson
*.capnproto
*.cbor
*.csv
*.json
*.fb
*.flexbuf
Expand Down
16 changes: 14 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ option(REFLECTCPP_AVRO "Enable AVRO support" ${REFLECTCPP_ALL_FORMATS})
option(REFLECTCPP_BSON "Enable BSON support" ${REFLECTCPP_ALL_FORMATS})
option(REFLECTCPP_CAPNPROTO "Enable Cap’n Proto support" ${REFLECTCPP_ALL_FORMATS})
option(REFLECTCPP_CBOR "Enable CBOR support" ${REFLECTCPP_ALL_FORMATS})
option(REFLECTCPP_CSV "Enable CSV support" ${REFLECTCPP_ALL_FORMATS})
option(REFLECTCPP_FLEXBUFFERS "Enable flexbuffers support" ${REFLECTCPP_ALL_FORMATS})
option(REFLECTCPP_MSGPACK "Enable msgpack support" ${REFLECTCPP_ALL_FORMATS})
option(REFLECTCPP_PARQUET "Enable parquet support" ${REFLECTCPP_ALL_FORMATS})
Expand Down Expand Up @@ -55,8 +56,8 @@ endif()

if (REFLECTCPP_BUILD_TESTS OR REFLECTCPP_BUILD_BENCHMARKS OR
(REFLECTCPP_JSON AND NOT REFLECTCPP_USE_BUNDLED_DEPENDENCIES) OR REFLECTCPP_AVRO OR
REFLECTCPP_BSON OR REFLECTCPP_CAPNPROTO OR REFLECTCPP_CBOR OR REFLECTCPP_FLEXBUFFERS OR
REFLECTCPP_MSGPACK OR REFLECTCPP_PARQUET OR REFLECTCPP_XML OR
REFLECTCPP_BSON OR REFLECTCPP_CAPNPROTO OR REFLECTCPP_CBOR OR REFLECTCPP_CSV OR
REFLECTCPP_FLEXBUFFERS OR REFLECTCPP_MSGPACK OR REFLECTCPP_PARQUET OR REFLECTCPP_XML OR
REFLECTCPP_TOML OR REFLECTCPP_UBJSON OR REFLECTCPP_YAML)
# enable vcpkg per default if features other than JSON are required
set(REFLECTCPP_USE_VCPKG_DEFAULT ON)
Expand Down Expand Up @@ -95,6 +96,10 @@ if (REFLECTCPP_USE_VCPKG)
list(APPEND VCPKG_MANIFEST_FEATURES "cbor")
endif()

if (REFLECTCPP_CSV)
list(APPEND VCPKG_MANIFEST_FEATURES "csv")
endif()

if (NOT REFLECTCPP_USE_BUNDLED_DEPENDENCIES)
list(APPEND VCPKG_MANIFEST_FEATURES "ctre")
endif()
Expand Down Expand Up @@ -246,6 +251,13 @@ if (REFLECTCPP_CBOR)
include_directories(PUBLIC ${jsoncons_INCLUDE_DIRS})
endif ()

if (REFLECTCPP_CSV)
if (NOT TARGET Arrow)
find_package(Arrow CONFIG REQUIRED)
endif()
target_link_libraries(reflectcpp PUBLIC "$<IF:$<BOOL:${ARROW_BUILD_STATIC}>,Arrow::arrow_static,Arrow::arrow_shared>")
endif ()

if (REFLECTCPP_FLEXBUFFERS)
list(APPEND REFLECT_CPP_SOURCES
src/reflectcpp_flexbuf.cpp
Expand Down
218 changes: 218 additions & 0 deletions docs/supported_formats/csv.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
# csv

For CSV support, include the header `<rfl/csv.hpp>` and link to the [Apache Arrow](https://arrow.apache.org/) library.
Furthermore, when compiling reflect-cpp, you need to pass `-DREFLECTCPP_CSV=ON` to cmake.

CSV is a tabular text format. Like other tabular formats in reflect-cpp, CSV is designed for collections of flat records and has limitations for nested or variant types.

## Reading and writing

Suppose you have a struct like this:

```cpp
struct Person {
std::string first_name;
std::string last_name = "Simpson";
rfl::Timestamp<"%Y-%m-%d"> birthday;
unsigned int age;
rfl::Email email;
};
```

Important: CSV is a tabular format that requires collections of records. You cannot serialize individual structs - you must use containers like `std::vector<Person>`, `std::deque<Person>`, etc.

Write a collection to a string (CSV bytes) like this:

```cpp
const auto people = std::vector<Person>{
Person{.first_name = "Bart", .birthday = "1987-04-19", .age = 10, .email = "bart@simpson.com"},
Person{.first_name = "Lisa", .birthday = "1987-04-19", .age = 8, .email = "lisa@simpson.com"}
};

const std::string csv_text = rfl::csv::write(people);
```

Parse from a string or bytes view:

```cpp
const rfl::Result<std::vector<Person>> result = rfl::csv::read<std::vector<Person>>(csv_text);
```

## Settings

CSV behavior can be configured using `rfl::csv::Settings`:

```cpp
const auto settings = rfl::csv::Settings{}
.with_delimiter(';')
.with_quoting(true)
.with_quote_char('"')
.with_null_string("n/a")
.with_double_quote(true)
.with_escaping(false)
.with_escape_char('\\')
.with_newlines_in_values(false)
.with_ignore_empty_lines(true)
.with_batch_size(1024);

const std::string csv_text = rfl::csv::write(people, settings);
```

Key options:
- `batch_size` - Maximum number of rows processed per batch (performance tuning)
- `delimiter` - Field delimiter character
- `quoting` - Whether to use quoting when writing
- `quote_char` - Quote character used when reading
- `null_string` - String representation for null values
- `double_quote` - Whether a quote inside a value is double-quoted (reading)
- `escaping` - Whether escaping is used (reading)
- `escape_char` - Escape character (reading)
- `newlines_in_values` - Whether CR/LF are allowed inside values (reading)
- `ignore_empty_lines` - Whether empty lines are ignored (reading)

## Loading and saving

You can load from and save to disk:

```cpp
const rfl::Result<std::vector<Person>> result = rfl::csv::load<std::vector<Person>>("/path/to/file.csv");

const auto people = std::vector<Person>{...};
rfl::csv::save("/path/to/file.csv", people);
```

With custom settings:

```cpp
const auto settings = rfl::csv::Settings{}.with_delimiter(';');
rfl::csv::save("/path/to/file.csv", people, settings);
```

## Reading from and writing into streams

You can read from any `std::istream` and write to any `std::ostream`:

```cpp
const rfl::Result<std::vector<Person>> result = rfl::csv::read<std::vector<Person>>(my_istream);

const auto people = std::vector<Person>{...};
rfl::csv::write(people, my_ostream);
```

With custom settings:

```cpp
const auto settings = rfl::csv::Settings{}.with_delimiter(';');
rfl::csv::write(people, my_ostream, settings);
```

## Field name transformations

Like other formats, CSV supports field name transformations via processors, e.g. `SnakeCaseToCamelCase`:

```cpp
const auto people = std::vector<Person>{...};
const auto result = rfl::csv::read<std::vector<Person>, rfl::SnakeCaseToCamelCase>(csv_text);
```

## Enums and validation

CSV supports enums and validated types. Enums are written/read as strings:

```cpp
enum class FirstName { Bart, Lisa, Maggie, Homer };

struct Person {
rfl::Rename<"firstName", FirstName> first_name;
rfl::Rename<"lastName", std::string> last_name;
rfl::Timestamp<"%Y-%m-%d"> birthday;
rfl::Validator<unsigned int, rfl::Minimum<0>, rfl::Maximum<130>> age;
rfl::Email email;
};
```

## Limitations of tabular formats

CSV, like other tabular formats, has limitations compared to hierarchical formats such as JSON or XML:

### Collections requirement
You must serialize collections, not individual objects:
```cpp
std::vector<Person> people = {...}; // ✅ Correct
Person person = {...}; // ❌ Wrong - must be in a container
```

### No nested objects
Each field must be a primitive type, enum, or a simple validated type. Nested objects are not automatically flattened:
```cpp
// This would NOT work as expected - nested objects are not automatically flattened
struct Address {
std::string street;
std::string city;
};

struct Person {
std::string first_name;
std::string last_name;
Address address; // ❌ Will cause compilation errors for CSV
};
```

### Using rfl::Flatten for nested objects
If you need to include nested objects, use `rfl::Flatten` to explicitly flatten them:
```cpp
struct Address {
std::string street;
std::string city;
};

struct Person {
std::string first_name;
std::string last_name;
rfl::Flatten<Address> address; // ✅ This will flatten the Address fields
};

// The resulting CSV will have columns: first_name, last_name, street, city
```

### No variant types
Variant types like `std::variant`, `rfl::Variant`, or `rfl::TaggedUnion` cannot be serialized to CSV as separate columns:
```cpp
// ❌ This will NOT work
struct Person {
std::string first_name;
std::variant<std::string, int> status; // Variant - not supported
rfl::Variant<std::string, int> type; // rfl::Variant - not supported
rfl::TaggedUnion<"type", std::string, int> category; // TaggedUnion - not supported
};
```

### No arrays (except bytestrings)
CSV output here does not support arrays (lists) of values in a single column. The only array-like field supported is binary data represented as bytestrings:
```cpp
// ❌ This will NOT work
struct Person {
std::string first_name;
std::vector<std::string> hobbies; // Array of strings - not supported
std::vector<int> scores; // Array of integers - not supported
std::vector<Address> addresses; // Array of objects - not supported
};

// ✅ This works
struct Blob {
std::vector<char> binary_data; // Binary data supported as bytestring
};
```

### Use cases
CSV is ideal for:
- Data exchange and interoperability
- Simple, flat data structures with consistent types
- Human-readable datasets

CSV is less suitable for:
- Complex nested data structures
- Data with arrays or variant types
- Strict schemas with evolving types
- Very large datasets where binary columnar formats are preferred

2 changes: 2 additions & 0 deletions docs/supported_formats/parquet.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# parquet

For Parquet support, you must also include the header `<rfl/parquet.hpp>` and link to the [Apache Arrow](https://arrow.apache.org/) and [Apache Parquet](https://parquet.apache.org/) libraries.
Furthermore, when compiling reflect-cpp, you need to pass `-DREFLECTCPP_PARQUET=ON` to cmake.

Expand Down
6 changes: 6 additions & 0 deletions include/rfl/Timestamp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ class Timestamp {
return from_string(_str.c_str());
}

/// Returns a result containing the timestamp when successful or an Error
/// otherwise.
static Result<Timestamp> make(const auto& _str) noexcept {
return from_string(_str);
}

/// Necessary for the serialization to work.
ReflectionType reflection() const {
char outstr[200];
Expand Down
10 changes: 10 additions & 0 deletions include/rfl/csv.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef RFL_CSV_HPP_
#define RFL_CSV_HPP_

#include "../rfl.hpp"
#include "csv/load.hpp"
#include "csv/read.hpp"
#include "csv/save.hpp"
#include "csv/write.hpp"

#endif
97 changes: 97 additions & 0 deletions include/rfl/csv/Settings.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#ifndef RFL_CSV_SETTINGS_HPP_
#define RFL_CSV_SETTINGS_HPP_

#include <arrow/csv/api.h>
#include <arrow/io/api.h>

#include "../Field.hpp"
#include "../replace.hpp"

namespace rfl::csv {

struct Settings {
/// Maximum number of rows processed at a time.
/// Data is processed in batches of N rows. This number
/// can impact performance.
int32_t batch_size = 1024;

/// Field delimiter.
char delimiter = ',';

/// Whether quoting is used.
bool quoting = true;

/// Quoting character (if quoting is true). Only relevant for reading.
char quote_char = '"';

/// The string to be used for null values. Quotes are not allowed in this
/// string.
std::string null_string = "n/a";

/// Whether a quote inside a value is double-quoted. Only relevant for
/// reading.
bool double_quote = true;

/// Whether escaping is used. Only relevant for reading.
bool escaping = false;

/// Escaping character (if escaping is true). Only relevant for reading.
char escape_char = arrow::csv::kDefaultEscapeChar;

/// Whether values are allowed to contain CR (0x0d) and LF (0x0a)
/// characters. Only relevant for reading.
bool newlines_in_values = false;

/// Whether empty lines are ignored.
/// If false, an empty line represents a single empty value (assuming a
/// one-column CSV file). Only relevant for reading.
bool ignore_empty_lines = true;

Settings with_batch_size(const int32_t _batch_size) const noexcept {
return replace(*this, make_field<"batch_size">(_batch_size));
}

Settings with_delimiter(const char _delimiter) const noexcept {
return replace(*this, make_field<"delimiter">(_delimiter));
}

Settings with_quoting(const bool _quoting) const noexcept {
return replace(*this, make_field<"quoting">(_quoting));
}

Settings with_quote_char(const char _quote_char) const noexcept {
return replace(*this, make_field<"quote_char">(_quote_char));
}

Settings with_null_string(const std::string& _null_string) const noexcept {
return replace(*this, make_field<"null_string">(_null_string));
}

Settings with_double_quote(const bool _double_quote) const noexcept {
return replace(*this, make_field<"double_quote">(_double_quote));
}

Settings with_escaping(const bool _escaping) const noexcept {
return replace(*this, make_field<"escaping">(_escaping));
}

Settings with_escape_char(const char _escape_char) const noexcept {
return replace(*this, make_field<"escape_char">(_escape_char));
}

Settings with_newlines_in_values(
const bool _newlines_in_values) const noexcept {
return replace(*this,
make_field<"newlines_in_values">(_newlines_in_values));
}

Settings with_ignore_empty_lines(
const bool _ignore_empty_lines) const noexcept {
return replace(*this,
make_field<"ignore_empty_lines">(_ignore_empty_lines));
}
};

} // namespace rfl::csv

#endif
Loading
Loading