Skip to content
37 changes: 24 additions & 13 deletions docs/supported_formats/csv.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,35 @@ const rfl::Result<std::vector<Person>> result = rfl::csv::read<std::vector<Perso

## Settings

CSV behavior can be configured using `rfl::csv::Settings`:
CSV behavior can be configured using `rfl::csv::Settings`. Each option is set
through a single `with<>()` accessor that takes either a pointer-to-member or
the field name as a template argument and returns a new copy of `Settings`
with the chosen field replaced:

```cpp
const auto settings = rfl::csv::Settings{}
.with_delimiter(';')
.with_quoting(true)
.with_quote_char('"')
.with_null_string("n/a")
.with_double_quote(true)
.with_escaping(false)
.with_escape_char('\\')
.with_newlines_in_values(false)
.with_ignore_empty_lines(true)
.with_batch_size(1024);
.with<&rfl::csv::Settings::delimiter>(';')
.with<&rfl::csv::Settings::quoting>(true)
.with<&rfl::csv::Settings::quote_char>('"')
.with<&rfl::csv::Settings::null_string>("n/a")
.with<&rfl::csv::Settings::double_quote>(true)
.with<&rfl::csv::Settings::escaping>(false)
.with<&rfl::csv::Settings::escape_char>('\\')
.with<&rfl::csv::Settings::newlines_in_values>(false)
.with<&rfl::csv::Settings::ignore_empty_lines>(true)
.with<&rfl::csv::Settings::batch_size>(1024);

const std::string csv_text = rfl::csv::write(people, settings);
```

The same call is also available with a string literal naming the field:

```cpp
const auto settings = rfl::csv::Settings{}
.with<"delimiter">(';')
.with<"quoting">(true);
```

Key options:
- `batch_size` - Maximum number of rows processed per batch (performance tuning)
- `delimiter` - Field delimiter character
Expand All @@ -84,7 +95,7 @@ rfl::csv::save("/path/to/file.csv", people);
With custom settings:

```cpp
const auto settings = rfl::csv::Settings{}.with_delimiter(';');
const auto settings = rfl::csv::Settings{}.with<"delimiter">(';');
rfl::csv::save("/path/to/file.csv", people, settings);
```

Expand All @@ -102,7 +113,7 @@ rfl::csv::write(people, my_ostream);
With custom settings:

```cpp
const auto settings = rfl::csv::Settings{}.with_delimiter(';');
const auto settings = rfl::csv::Settings{}.with<"delimiter">(';');
rfl::csv::write(people, my_ostream, settings);
```

Expand Down
29 changes: 20 additions & 9 deletions docs/supported_formats/parquet.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,27 @@ const rfl::Result<std::vector<Person>> result = rfl::parquet::read<std::vector<P

## Settings and compression

Parquet supports various compression algorithms and chunk sizes. You can configure these using the `Settings` struct:
Parquet supports various compression algorithms and chunk sizes. You can
configure these using the `Settings` struct. Each option is set through a
single `with<>()` accessor that takes either a pointer-to-member or the
field name as a template argument and returns a new copy of `Settings` with
the chosen field replaced:

```cpp
const auto settings = rfl::parquet::Settings{}
.with_compression(rfl::parquet::Compression::GZIP)
.with_chunksize(1000);
.with<&rfl::parquet::Settings::compression>(rfl::parquet::Compression::GZIP)
.with<&rfl::parquet::Settings::chunksize>(1000);

const std::vector<char> bytes = rfl::parquet::write(people, settings);
```

The same call is also available with a string literal naming the field:

```cpp
const auto settings = rfl::parquet::Settings{}
.with<"compression">(rfl::parquet::Compression::GZIP);
```

Available compression options include:

- `UNCOMPRESSED` - No compression, fastest read/write but largest file size
Expand All @@ -65,16 +76,16 @@ Available compression options include:
```cpp
// Examples of different compression settings
const auto snappy_settings = rfl::parquet::Settings{}
.with_compression(rfl::parquet::Compression::SNAPPY);
.with<&rfl::parquet::Settings::compression>(rfl::parquet::Compression::SNAPPY);

const auto gzip_settings = rfl::parquet::Settings{}
.with_compression(rfl::parquet::Compression::GZIP);
.with<&rfl::parquet::Settings::compression>(rfl::parquet::Compression::GZIP);

const auto zstd_settings = rfl::parquet::Settings{}
.with_compression(rfl::parquet::Compression::ZSTD);
.with<&rfl::parquet::Settings::compression>(rfl::parquet::Compression::ZSTD);

const auto uncompressed_settings = rfl::parquet::Settings{}
.with_compression(rfl::parquet::Compression::UNCOMPRESSED);
.with<&rfl::parquet::Settings::compression>(rfl::parquet::Compression::UNCOMPRESSED);
```

## Loading and saving
Expand All @@ -92,7 +103,7 @@ With custom settings:

```cpp
const auto settings = rfl::parquet::Settings{}
.with_compression(rfl::parquet::Compression::GZIP);
.with<&rfl::parquet::Settings::compression>(rfl::parquet::Compression::GZIP);
rfl::parquet::save("/path/to/file.parquet", people, settings);
```

Expand All @@ -111,7 +122,7 @@ With custom settings:

```cpp
const auto settings = rfl::parquet::Settings{}
.with_compression(rfl::parquet::Compression::GZIP);
.with<&rfl::parquet::Settings::compression>(rfl::parquet::Compression::GZIP);
rfl::parquet::write(people, my_ostream, settings);
```

Expand Down
1 change: 1 addition & 0 deletions include/rfl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
#include "rfl/patterns.hpp"
#include "rfl/remove_fields.hpp"
#include "rfl/replace.hpp"
#include "rfl/Settings.hpp"
#include "rfl/to_generic.hpp"
#include "rfl/to_named_tuple.hpp"
#include "rfl/to_view.hpp"
Expand Down
103 changes: 103 additions & 0 deletions include/rfl/Settings.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#ifndef RFL_SETTINGS_HPP_
#define RFL_SETTINGS_HPP_

#include <cstddef>
#include <type_traits>
#include <utility>

#include "Field.hpp"
#include "internal/StringLiteral.hpp"
#include "internal/field_index_by_name.hpp"
#include "internal/field_index_from_ptm.hpp"
#include "internal/get_field_names.hpp"
#include "internal/get_ith_field_from_fake_object.hpp"
#include "replace.hpp"

namespace rfl::internal {

template <auto FieldPtr, class ExpectedOwner>
struct member_ptr_traits : std::false_type {};

template <class Owner, class FieldType, FieldType Owner::* FieldPtr,
class ExpectedOwner>
struct member_ptr_traits<FieldPtr, ExpectedOwner>
: std::bool_constant<std::is_same_v<Owner, ExpectedOwner> &&
std::is_const_v<FieldType>> {};

template <auto FieldPtr, class ExpectedOwner>
concept const_member_of = member_ptr_traits<FieldPtr, ExpectedOwner>::value;

/// Returns a copy of `_obj` with the field designated by `FieldPtr` replaced
/// by `_value`. Used as the body of the with() method generated by
/// RFL_SETTINGS_OPS. Field name is recovered through the fake-object path
/// that the rest of reflect-cpp uses, so MSVC parses it correctly.
template <class T, auto FieldPtr>
requires const_member_of<FieldPtr, T>
T settings_with_replaced(
const T& _obj,
std::remove_const_t<
std::remove_reference_t<decltype(std::declval<T>().*FieldPtr)>>
_value) {
constexpr std::size_t I = field_index_v<T, FieldPtr>;
static_assert(
I != static_cast<std::size_t>(-1),
"FieldPtr does not refer to a non-static data member of T. "
"Make sure you pass &T::field, where field is declared inside T.");
constexpr auto name =
get_field_name_str_lit<T, fake_field_ptr_for_name_lookup<T, I>()>();
return rfl::replace(_obj, rfl::make_field<name>(std::move(_value)));
}

/// Returns a copy of `_obj` with the field with the given Name replaced by
/// `_value`. Used as the body of the with<"name">() overload generated by
/// RFL_SETTINGS_OPS.
template <class T, StringLiteral Name>
T settings_with_replaced_by_name(
const T& _obj,
field_value_type_at_t<T, field_index_by_name_v<T, Name>> _value) {
constexpr std::size_t I = field_index_by_name_v<T, Name>;
static_assert(I != static_cast<std::size_t>(-1),
"No field with the given name exists in T.");
#ifdef _MSC_VER
static_assert(ith_field_is_const<T, static_cast<int>(I)>(),
"Fields in Settings structs must be const.");
#endif
return rfl::replace(_obj, rfl::make_field<Name>(std::move(_value)));
Comment on lines +58 to +65
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The with<"name">() overload doesn't enforce that fields in a Settings struct must be const, unlike the pointer-to-member overload with<&T::field>(). This could lead to mutable settings fields being used, which goes against the design goal of this PR.

To ensure consistency and enforce immutability for all Settings fields, I suggest adding a static_assert to check that the field is const.

  constexpr std::size_t I = field_index_by_name_v<T, Name>;
  static_assert(I != static_cast<std::size_t>(-1),
                "No field with the given name exists in T.");
  using FieldTypeWithCv = std::remove_pointer_t<
      decltype(get_ith_field_from_fake_object<T, static_cast<int>(I)>())>;
  static_assert(std::is_const_v<FieldTypeWithCv>,
                "Fields in Settings structs must be const.");
  return rfl::replace(_obj, rfl::make_field<Name>(std::move(_value)));

}

} // namespace rfl::internal

/// Defines the standard with<&T::field>(value) and with<"field">(value)
/// accessors inside a settings struct. The struct must be a flat aggregate
/// (no base classes). Fields should be declared const so the only way to
/// mutate them is via with(), which returns a new copy with the chosen field
/// replaced. Place the macro at the end of the struct body, after all data
/// members.
///
/// Usage:
/// struct MySettings {
/// const int some_option = 42;
/// RFL_SETTINGS_OPS(MySettings)
/// };
///
/// auto a = MySettings{}.with<&MySettings::some_option>(100);
/// auto b = MySettings{}.with<"some_option">(100);
#define RFL_SETTINGS_OPS(Derived) \
template <auto FieldPtr> \
requires ::rfl::internal::const_member_of<FieldPtr, Derived> \
Derived with(std::remove_const_t<std::remove_reference_t< \
decltype(std::declval<Derived>().*FieldPtr)>> \
_value) const { \
return ::rfl::internal::settings_with_replaced<Derived, FieldPtr>( \
*this, std::move(_value)); \
} \
template <::rfl::internal::StringLiteral Name> \
Derived with(::rfl::internal::field_value_type_at_t< \
Derived, \
::rfl::internal::field_index_by_name_v<Derived, Name>> \
_value) const { \
return ::rfl::internal::settings_with_replaced_by_name<Derived, Name>( \
*this, std::move(_value)); \
}

#endif
36 changes: 24 additions & 12 deletions include/rfl/csv/Settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,87 +4,99 @@
#include <arrow/csv/api.h>
#include <arrow/io/api.h>

#include "../Field.hpp"
#include "../replace.hpp"
#include "../Settings.hpp"
#include "../internal/deprecated_with.hpp"

namespace rfl::csv {

struct Settings {
/// Maximum number of rows processed at a time.
/// Data is processed in batches of N rows. This number
/// can impact performance.
int32_t batch_size = 1024;
const int32_t batch_size = 1024;

/// Field delimiter.
char delimiter = ',';
const char delimiter = ',';

/// Whether quoting is used.
bool quoting = true;
const bool quoting = true;

/// Quoting character (if quoting is true). Only relevant for reading.
char quote_char = '"';
const char quote_char = '"';

/// The string to be used for null values. Quotes are not allowed in this
/// string.
std::string null_string = "n/a";
const std::string null_string = "n/a";

/// Whether a quote inside a value is double-quoted. Only relevant for
/// reading.
bool double_quote = true;
const bool double_quote = true;

/// Whether escaping is used. Only relevant for reading.
bool escaping = false;
const bool escaping = false;

/// Escaping character (if escaping is true). Only relevant for reading.
char escape_char = arrow::csv::kDefaultEscapeChar;
const char escape_char = arrow::csv::kDefaultEscapeChar;

/// Whether values are allowed to contain CR (0x0d) and LF (0x0a)
/// characters. Only relevant for reading.
bool newlines_in_values = false;
const bool newlines_in_values = false;

/// Whether empty lines are ignored.
/// If false, an empty line represents a single empty value (assuming a
/// one-column CSV file). Only relevant for reading.
bool ignore_empty_lines = true;
const bool ignore_empty_lines = true;

RFL_SETTINGS_OPS(Settings)

RFL_DEPRECATED_WITH(batch_size)
Settings with_batch_size(const int32_t _batch_size) const noexcept {
return replace(*this, make_field<"batch_size">(_batch_size));
}

RFL_DEPRECATED_WITH(delimiter)
Settings with_delimiter(const char _delimiter) const noexcept {
return replace(*this, make_field<"delimiter">(_delimiter));
}

RFL_DEPRECATED_WITH(quoting)
Settings with_quoting(const bool _quoting) const noexcept {
return replace(*this, make_field<"quoting">(_quoting));
}

RFL_DEPRECATED_WITH(quote_char)
Settings with_quote_char(const char _quote_char) const noexcept {
return replace(*this, make_field<"quote_char">(_quote_char));
}

RFL_DEPRECATED_WITH(null_string)
Settings with_null_string(const std::string& _null_string) const noexcept {
return replace(*this, make_field<"null_string">(_null_string));
}

RFL_DEPRECATED_WITH(double_quote)
Settings with_double_quote(const bool _double_quote) const noexcept {
return replace(*this, make_field<"double_quote">(_double_quote));
}

RFL_DEPRECATED_WITH(escaping)
Settings with_escaping(const bool _escaping) const noexcept {
return replace(*this, make_field<"escaping">(_escaping));
}

RFL_DEPRECATED_WITH(escape_char)
Settings with_escape_char(const char _escape_char) const noexcept {
return replace(*this, make_field<"escape_char">(_escape_char));
}

RFL_DEPRECATED_WITH(newlines_in_values)
Settings with_newlines_in_values(
const bool _newlines_in_values) const noexcept {
return replace(*this,
make_field<"newlines_in_values">(_newlines_in_values));
}

RFL_DEPRECATED_WITH(ignore_empty_lines)
Settings with_ignore_empty_lines(
const bool _ignore_empty_lines) const noexcept {
return replace(*this,
Expand Down
12 changes: 12 additions & 0 deletions include/rfl/internal/deprecated_with.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef RFL_INTERNAL_DEPRECATED_WITH_HPP_
#define RFL_INTERNAL_DEPRECATED_WITH_HPP_

/// Marks a legacy with_<field>() method as deprecated, pointing at the
/// unified RFL_SETTINGS_OPS-generated with<&T::field>() / with<"field">()
/// API. Used in csv::Settings and parquet::Settings to keep the old
/// per-field setter names working for existing users.
#define RFL_DEPRECATED_WITH(field_name) \
[[deprecated("Use .with<&Settings::" #field_name ">(value) or " \
".with<\"" #field_name "\">(value) instead.")]]

#endif
Loading
Loading