From 93ad80ded903b03bf65e116ee7028ce0769bc1b2 Mon Sep 17 00:00:00 2001 From: scMarkus <55751061+scMarkus@users.noreply.github.com> Date: Tue, 29 Aug 2023 17:16:37 +0200 Subject: [PATCH] fix(codecs) csv encoding quoting bug (#18320) * added additional csv configuration options found potential bug on writing lines with quoted fields * implemented suggestions of pront * added configurable QuotingStyle and made problematic quoted tests pass for now * switched to defaults by serde * refactor csv codec to use low level csv-core to fix missing closing quote bug * fixed typo * use field iterator to count written filds instead of manual counter * moved internal_buffer into CsvSerializer to not recreated with every call to encode() * improoved comments * implemented suggestions * cleanup * refactored test * low level csv writer api refactor * Revert "low level csv writer api refactor" This reverts commit c730d58d6c7d69aa897278766accd513bd023e01. * some more cleanup * generate-component-docs --- Cargo.lock | 2 +- lib/codecs/Cargo.toml | 2 +- lib/codecs/src/encoding/format/csv.rs | 435 +++++++++++++++++- .../reference/components/sinks/base/amqp.cue | 84 +++- .../sinks/base/aws_cloudwatch_logs.cue | 84 +++- .../sinks/base/aws_kinesis_firehose.cue | 84 +++- .../sinks/base/aws_kinesis_streams.cue | 84 +++- .../components/sinks/base/aws_s3.cue | 84 +++- .../components/sinks/base/aws_sns.cue | 84 +++- .../components/sinks/base/aws_sqs.cue | 84 +++- .../components/sinks/base/azure_blob.cue | 84 +++- .../components/sinks/base/console.cue | 84 +++- .../components/sinks/base/databend.cue | 84 +++- .../reference/components/sinks/base/file.cue | 84 +++- .../sinks/base/gcp_chronicle_unstructured.cue | 84 +++- .../sinks/base/gcp_cloud_storage.cue | 84 +++- .../components/sinks/base/gcp_pubsub.cue | 84 +++- .../reference/components/sinks/base/http.cue | 84 +++- .../components/sinks/base/humio_logs.cue | 84 +++- .../reference/components/sinks/base/kafka.cue | 84 +++- .../reference/components/sinks/base/loki.cue | 84 +++- .../reference/components/sinks/base/nats.cue | 84 +++- .../components/sinks/base/papertrail.cue | 84 +++- .../components/sinks/base/pulsar.cue | 84 +++- .../reference/components/sinks/base/redis.cue | 84 +++- .../components/sinks/base/socket.cue | 84 +++- .../components/sinks/base/splunk_hec_logs.cue | 84 +++- .../components/sinks/base/webhdfs.cue | 84 +++- .../components/sinks/base/websocket.cue | 84 +++- 29 files changed, 2340 insertions(+), 283 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9f3f552ab5424..7275391a0aa03 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2147,7 +2147,7 @@ dependencies = [ "apache-avro", "bytes 1.4.0", "chrono", - "csv", + "csv-core", "derivative", "dyn-clone", "futures 0.3.28", diff --git a/lib/codecs/Cargo.toml b/lib/codecs/Cargo.toml index b7a9e32e6e83b..6787e42860bd4 100644 --- a/lib/codecs/Cargo.toml +++ b/lib/codecs/Cargo.toml @@ -9,7 +9,7 @@ publish = false apache-avro = { version = "0.15.0", default-features = false } bytes = { version = "1", default-features = false } chrono = { version = "0.4", default-features = false } -csv = { version = "1.2", default-features = false } +csv-core = { version = "0.1.10", default-features = false } derivative = { version = "2", default-features = false } dyn-clone = { version = "1", default-features = false } lookup = { package = "vector-lookup", path = "../vector-lookup", default-features = false } diff --git a/lib/codecs/src/encoding/format/csv.rs b/lib/codecs/src/encoding/format/csv.rs index fe7191785821a..a440c994d01ef 100644 --- a/lib/codecs/src/encoding/format/csv.rs +++ b/lib/codecs/src/encoding/format/csv.rs @@ -1,6 +1,7 @@ use crate::encoding::BuildError; -use bytes::{BufMut, BytesMut}; +use bytes::BytesMut; use chrono::SecondsFormat; +use csv_core::{WriteResult, Writer, WriterBuilder}; use lookup::lookup_v2::ConfigTargetPath; use tokio_util::codec::Encoder; use vector_core::{ @@ -9,6 +10,30 @@ use vector_core::{ schema, }; +/// The user configuration to choose the metric tag strategy. +#[crate::configurable_component] +#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)] +#[serde(rename_all = "snake_case")] +pub enum QuoteStyle { + /// This puts quotes around every field. Always. + Always, + + /// This puts quotes around fields only when necessary. + /// They are necessary when fields contain a quote, delimiter or record terminator. + /// Quotes are also necessary when writing an empty record + /// (which is indistinguishable from a record with one empty field). + #[default] + Necessary, + + /// This puts quotes around all fields that are non-numeric. + /// Namely, when writing a field that does not parse as a valid float or integer, + /// then quotes will be used even if they aren’t strictly necessary. + NonNumeric, + + /// This never writes quotes, even if it would produce invalid CSV data. + Never, +} + /// Config used to build a `CsvSerializer`. #[crate::configurable_component] #[derive(Debug, Clone)] @@ -28,7 +53,7 @@ impl CsvSerializerConfig { if self.csv.fields.is_empty() { Err("At least one CSV field must be specified".into()) } else { - Ok(CsvSerializer::new(self.csv.fields.clone())) + Ok(CsvSerializer::new(self.clone())) } } @@ -49,6 +74,57 @@ impl CsvSerializerConfig { #[crate::configurable_component] #[derive(Debug, Clone)] pub struct CsvSerializerOptions { + /// The field delimiter to use when writing CSV. + #[serde( + default = "default_delimiter", + with = "vector_core::serde::ascii_char", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub delimiter: u8, + + /// Enable double quote escapes. + /// + /// This is enabled by default, but it may be disabled. When disabled, quotes in + /// field data are escaped instead of doubled. + #[serde( + default = "default_double_quote", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub double_quote: bool, + + /// The escape character to use when writing CSV. + /// + /// In some variants of CSV, quotes are escaped using a special escape character + /// like \ (instead of escaping quotes by doubling them). + /// + /// To use this `double_quotes` needs to be disabled as well otherwise it is ignored + #[serde( + default = "default_escape", + with = "vector_core::serde::ascii_char", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub escape: u8, + + /// The quote character to use when writing CSV. + #[serde( + default = "default_escape", + with = "vector_core::serde::ascii_char", + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + quote: u8, + + /// The quoting style to use when writing CSV data. + #[serde( + default, + skip_serializing_if = "vector_core::serde::skip_serializing_if_default" + )] + pub quote_style: QuoteStyle, + + /// Set the capacity (in bytes) of the internal buffer used in the CSV writer. + /// This defaults to a reasonable setting. + #[serde(default = "default_capacity")] + pub capacity: usize, + /// Configures the fields that will be encoded, as well as the order in which they /// appear in the output. /// @@ -59,16 +135,82 @@ pub struct CsvSerializerOptions { pub fields: Vec, } +const fn default_delimiter() -> u8 { + b',' +} + +const fn default_escape() -> u8 { + b'"' +} + +const fn default_double_quote() -> bool { + true +} + +const fn default_capacity() -> usize { + 8 * (1 << 10) +} + +impl Default for CsvSerializerOptions { + fn default() -> Self { + Self { + delimiter: default_delimiter(), + double_quote: default_double_quote(), + escape: default_escape(), + quote: default_escape(), + quote_style: QuoteStyle::default(), + capacity: default_capacity(), + fields: Vec::new(), + } + } +} + +impl CsvSerializerOptions { + fn csv_quote_style(&self) -> csv_core::QuoteStyle { + match self.quote_style { + QuoteStyle::Always => csv_core::QuoteStyle::Always, + QuoteStyle::Necessary => csv_core::QuoteStyle::Necessary, + QuoteStyle::NonNumeric => csv_core::QuoteStyle::NonNumeric, + QuoteStyle::Never => csv_core::QuoteStyle::Never, + } + } +} + /// Serializer that converts an `Event` to bytes using the CSV format. #[derive(Debug, Clone)] pub struct CsvSerializer { + // Box because of clippy error: 'large size difference between variants' + // in SerializerConfig enum + writer: Box, fields: Vec, + internal_buffer: Vec, } impl CsvSerializer { /// Creates a new `CsvSerializer`. - pub const fn new(fields: Vec) -> Self { - Self { fields } + pub fn new(config: CsvSerializerConfig) -> Self { + // 'flexible' is not needed since every event is a single context free csv line + let writer = Box::new( + WriterBuilder::new() + .delimiter(config.csv.delimiter) + .double_quote(config.csv.double_quote) + .escape(config.csv.escape) + .quote_style(config.csv.csv_quote_style()) + .quote(config.csv.quote) + .build(), + ); + + let internal_buffer = if config.csv.capacity < 1 { + vec![0; 1] + } else { + vec![0; config.csv.capacity] + }; + + Self { + writer, + internal_buffer, + fields: config.csv.fields, + } } } @@ -77,25 +219,86 @@ impl Encoder for CsvSerializer { fn encode(&mut self, event: Event, buffer: &mut BytesMut) -> Result<(), Self::Error> { let log = event.into_log(); - let mut wtr = csv::Writer::from_writer(buffer.writer()); - for field in &self.fields { - match log.get(field) { - Some(Value::Bytes(bytes)) => { - wtr.write_field(String::from_utf8_lossy(bytes).to_string())? + + let mut used_buffer_bytes = 0; + for (fields_written, field) in self.fields.iter().enumerate() { + let field_value = log.get(field); + + // write field delimiter + if fields_written > 0 { + loop { + let (res, bytes_written) = self + .writer + .delimiter(&mut self.internal_buffer[used_buffer_bytes..]); + used_buffer_bytes += bytes_written; + match res { + WriteResult::InputEmpty => { + break; + } + WriteResult::OutputFull => { + buffer.extend_from_slice(&self.internal_buffer[..used_buffer_bytes]); + used_buffer_bytes = 0; + } + } } - Some(Value::Integer(int)) => wtr.write_field(int.to_string())?, - Some(Value::Float(float)) => wtr.write_field(float.to_string())?, - Some(Value::Boolean(bool)) => wtr.write_field(bool.to_string())?, + } + + // get string value of current field + let field_value = match field_value { + Some(Value::Bytes(bytes)) => String::from_utf8_lossy(bytes).into_owned(), + Some(Value::Integer(int)) => int.to_string(), + Some(Value::Float(float)) => float.to_string(), + Some(Value::Boolean(bool)) => bool.to_string(), Some(Value::Timestamp(timestamp)) => { - wtr.write_field(timestamp.to_rfc3339_opts(SecondsFormat::AutoSi, true))? + timestamp.to_rfc3339_opts(SecondsFormat::AutoSi, true) } - Some(Value::Null) => wtr.write_field("")?, + Some(Value::Null) => String::new(), // Other value types: Array, Regex, Object are not supported by the CSV format. - Some(_) => wtr.write_field("")?, - None => wtr.write_field("")?, + Some(_) => String::new(), + None => String::new(), + }; + + // mutable byte_slice so it can be written in chunks if internal_buffer fills up + let mut field_value = field_value.as_bytes(); + // write field_value to internal buffer + loop { + let (res, bytes_read, bytes_written) = self + .writer + .field(field_value, &mut self.internal_buffer[used_buffer_bytes..]); + + field_value = &field_value[bytes_read..]; + used_buffer_bytes += bytes_written; + + match res { + WriteResult::InputEmpty => break, + WriteResult::OutputFull => { + buffer.extend_from_slice(&self.internal_buffer[..used_buffer_bytes]); + used_buffer_bytes = 0; + } + } + } + } + + // finish current event (potentially add closing quotes) + loop { + let (res, bytes_written) = self + .writer + .finish(&mut self.internal_buffer[used_buffer_bytes..]); + used_buffer_bytes += bytes_written; + match res { + WriteResult::InputEmpty => break, + WriteResult::OutputFull => { + buffer.extend_from_slice(&self.internal_buffer[..used_buffer_bytes]); + used_buffer_bytes = 0; + } } } - wtr.flush()?; + + // final flush of internal_buffer + if used_buffer_bytes > 0 { + buffer.extend_from_slice(&self.internal_buffer[..used_buffer_bytes]); + } + Ok(()) } } @@ -110,9 +313,25 @@ mod tests { use super::*; + fn make_event_with_fields(field_data: Vec<(&str, &str)>) -> (Vec, Event) { + let mut fields: Vec = std::vec::Vec::new(); + let mut tree = std::collections::BTreeMap::new(); + + for (field_name, field_value) in field_data.iter() { + let field = ConfigTargetPath::try_from(field_name.to_string()).unwrap(); + fields.push(field); + + let field_value = Value::from(field_value.to_string()); + tree.insert(field_name.to_string().clone(), field_value); + } + + let event = Event::Log(LogEvent::from(tree)); + (fields, event) + } + #[test] fn build_error_on_empty_fields() { - let opts = CsvSerializerOptions { fields: vec![] }; + let opts = CsvSerializerOptions::default(); let config = CsvSerializerConfig::new(opts); let err = config.build().unwrap_err(); assert_eq!(err.to_string(), "At least one CSV field must be specified"); @@ -131,7 +350,6 @@ mod tests { "bool" => Value::from(true), "other" => Value::from("data"), })); - let fields = vec![ ConfigTargetPath::try_from("foo".to_string()).unwrap(), ConfigTargetPath::try_from("int".to_string()).unwrap(), @@ -143,7 +361,12 @@ mod tests { ConfigTargetPath::try_from("quote".to_string()).unwrap(), ConfigTargetPath::try_from("bool".to_string()).unwrap(), ]; - let config = CsvSerializerConfig::new(CsvSerializerOptions { fields }); + + let opts = CsvSerializerOptions { + fields, + ..Default::default() + }; + let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); @@ -171,9 +394,14 @@ mod tests { ConfigTargetPath::try_from("field3".to_string()).unwrap(), ConfigTargetPath::try_from("field2".to_string()).unwrap(), ]; - let config = CsvSerializerConfig::new(CsvSerializerOptions { fields }); + let opts = CsvSerializerOptions { + fields, + ..Default::default() + }; + let config = CsvSerializerConfig::new(opts); let mut serializer = config.build().unwrap(); let mut bytes = BytesMut::new(); + serializer.encode(event, &mut bytes).unwrap(); assert_eq!( @@ -181,4 +409,169 @@ mod tests { b"value1,value5,value5,value3,value2".as_slice() ); } + + #[test] + fn correct_quoting() { + let event = Event::Log(LogEvent::from(btreemap! { + "field1" => Value::from("hello world"), + "field2" => Value::from(1), + "field3" => Value::from("foo\"bar"), + "field4" => Value::from("baz,bas"), + })); + let fields = vec![ + ConfigTargetPath::try_from("field1".to_string()).unwrap(), + ConfigTargetPath::try_from("field2".to_string()).unwrap(), + ConfigTargetPath::try_from("field3".to_string()).unwrap(), + ConfigTargetPath::try_from("field4".to_string()).unwrap(), + ]; + + let mut default_bytes = BytesMut::new(); + let mut never_bytes = BytesMut::new(); + let mut always_bytes = BytesMut::new(); + let mut non_numeric_bytes = BytesMut::new(); + + CsvSerializerConfig::new(CsvSerializerOptions { + fields: fields.clone(), + ..Default::default() + }) + .build() + .unwrap() + .encode(event.clone(), &mut default_bytes) + .unwrap(); + + CsvSerializerConfig::new(CsvSerializerOptions { + fields: fields.clone(), + quote_style: QuoteStyle::Never, + ..Default::default() + }) + .build() + .unwrap() + .encode(event.clone(), &mut never_bytes) + .unwrap(); + + CsvSerializerConfig::new(CsvSerializerOptions { + fields: fields.clone(), + quote_style: QuoteStyle::Always, + ..Default::default() + }) + .build() + .unwrap() + .encode(event.clone(), &mut always_bytes) + .unwrap(); + + CsvSerializerConfig::new(CsvSerializerOptions { + fields: fields.clone(), + quote_style: QuoteStyle::NonNumeric, + ..Default::default() + }) + .build() + .unwrap() + .encode(event.clone(), &mut non_numeric_bytes) + .unwrap(); + + assert_eq!( + default_bytes.freeze(), + b"hello world,1,\"foo\"\"bar\",\"baz,bas\"".as_slice() + ); + assert_eq!( + never_bytes.freeze(), + b"hello world,1,foo\"bar,baz,bas".as_slice() + ); + assert_eq!( + always_bytes.freeze(), + b"\"hello world\",\"1\",\"foo\"\"bar\",\"baz,bas\"".as_slice() + ); + assert_eq!( + non_numeric_bytes.freeze(), + b"\"hello world\",1,\"foo\"\"bar\",\"baz,bas\"".as_slice() + ); + } + + #[test] + fn custom_delimiter() { + let (fields, event) = + make_event_with_fields(vec![("field1", "value1"), ("field2", "value2")]); + let opts = CsvSerializerOptions { + fields, + delimiter: b'\t', + ..Default::default() + }; + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!(bytes.freeze(), b"value1\tvalue2".as_slice()); + } + + #[test] + fn custom_escape_char() { + let (fields, event) = make_event_with_fields(vec![("field1", "foo\"bar")]); + let opts = CsvSerializerOptions { + fields, + double_quote: false, + escape: b'\\', + ..Default::default() + }; + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!(bytes.freeze(), b"\"foo\\\"bar\"".as_slice()); + } + + #[test] + fn custom_quote_char() { + let (fields, event) = make_event_with_fields(vec![("field1", "foo \" $ bar")]); + let opts = CsvSerializerOptions { + fields, + quote: b'$', + ..Default::default() + }; + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!(bytes.freeze(), b"$foo \" $$ bar$".as_slice()); + } + + #[test] + fn more_input_then_capacity() { + let (fields, event) = make_event_with_fields(vec![("field1", "foo bar")]); + let opts = CsvSerializerOptions { + fields, + capacity: 3, + ..Default::default() + }; + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + + serializer.encode(event, &mut bytes).unwrap(); + + assert_eq!(bytes.freeze(), b"foo bar".as_slice()); + } + + #[test] + fn multiple_events() { + let (fields, event1) = make_event_with_fields(vec![("field1", "foo,")]); + let (_, event2) = make_event_with_fields(vec![("field1", "\nbar")]); + let opts = CsvSerializerOptions { + fields, + ..Default::default() + }; + let config = CsvSerializerConfig::new(opts); + let mut serializer = config.build().unwrap(); + let mut bytes = BytesMut::new(); + + serializer.encode(event1, &mut bytes).unwrap(); + serializer.encode(event2, &mut bytes).unwrap(); + + assert_eq!(bytes.freeze(), b"\"foo,\"\"\nbar\"".as_slice()); + } } diff --git a/website/cue/reference/components/sinks/base/amqp.cue b/website/cue/reference/components/sinks/base/amqp.cue index cd201bb517f08..213a2e226c245 100644 --- a/website/cue/reference/components/sinks/base/amqp.cue +++ b/website/cue/reference/components/sinks/base/amqp.cue @@ -127,18 +127,82 @@ base: components: sinks: amqp: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/aws_cloudwatch_logs.cue b/website/cue/reference/components/sinks/base/aws_cloudwatch_logs.cue index c0549fec49341..e18453f47703d 100644 --- a/website/cue/reference/components/sinks/base/aws_cloudwatch_logs.cue +++ b/website/cue/reference/components/sinks/base/aws_cloudwatch_logs.cue @@ -298,18 +298,82 @@ base: components: sinks: aws_cloudwatch_logs: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/aws_kinesis_firehose.cue b/website/cue/reference/components/sinks/base/aws_kinesis_firehose.cue index d5774f897043b..71bdc26dc6b30 100644 --- a/website/cue/reference/components/sinks/base/aws_kinesis_firehose.cue +++ b/website/cue/reference/components/sinks/base/aws_kinesis_firehose.cue @@ -277,18 +277,82 @@ base: components: sinks: aws_kinesis_firehose: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/aws_kinesis_streams.cue b/website/cue/reference/components/sinks/base/aws_kinesis_streams.cue index 77edb8cb46aa8..77b8fe21a94f8 100644 --- a/website/cue/reference/components/sinks/base/aws_kinesis_streams.cue +++ b/website/cue/reference/components/sinks/base/aws_kinesis_streams.cue @@ -277,18 +277,82 @@ base: components: sinks: aws_kinesis_streams: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/aws_s3.cue b/website/cue/reference/components/sinks/base/aws_s3.cue index 1facd8fd4d40c..802bf4111a340 100644 --- a/website/cue/reference/components/sinks/base/aws_s3.cue +++ b/website/cue/reference/components/sinks/base/aws_s3.cue @@ -386,18 +386,82 @@ base: components: sinks: aws_s3: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/aws_sns.cue b/website/cue/reference/components/sinks/base/aws_sns.cue index 8d7bc06f2405f..3cde47292280d 100644 --- a/website/cue/reference/components/sinks/base/aws_sns.cue +++ b/website/cue/reference/components/sinks/base/aws_sns.cue @@ -213,18 +213,82 @@ base: components: sinks: aws_sns: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/aws_sqs.cue b/website/cue/reference/components/sinks/base/aws_sqs.cue index ac6787c61fc41..4a9cf238eb0e7 100644 --- a/website/cue/reference/components/sinks/base/aws_sqs.cue +++ b/website/cue/reference/components/sinks/base/aws_sqs.cue @@ -213,18 +213,82 @@ base: components: sinks: aws_sqs: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/azure_blob.cue b/website/cue/reference/components/sinks/base/azure_blob.cue index 46a1a6fbe5cf2..df84255dec51e 100644 --- a/website/cue/reference/components/sinks/base/azure_blob.cue +++ b/website/cue/reference/components/sinks/base/azure_blob.cue @@ -240,18 +240,82 @@ base: components: sinks: azure_blob: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/console.cue b/website/cue/reference/components/sinks/base/console.cue index c4deb39c4ce1d..5b22906133e9d 100644 --- a/website/cue/reference/components/sinks/base/console.cue +++ b/website/cue/reference/components/sinks/base/console.cue @@ -111,18 +111,82 @@ base: components: sinks: console: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/databend.cue b/website/cue/reference/components/sinks/base/databend.cue index 2da07c23af456..d1acbb167c4a1 100644 --- a/website/cue/reference/components/sinks/base/databend.cue +++ b/website/cue/reference/components/sinks/base/databend.cue @@ -156,18 +156,82 @@ base: components: sinks: databend: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/file.cue b/website/cue/reference/components/sinks/base/file.cue index 383b9d2086f37..0c2696db0c8b2 100644 --- a/website/cue/reference/components/sinks/base/file.cue +++ b/website/cue/reference/components/sinks/base/file.cue @@ -131,18 +131,82 @@ base: components: sinks: file: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/gcp_chronicle_unstructured.cue b/website/cue/reference/components/sinks/base/gcp_chronicle_unstructured.cue index 382162e99084c..d34ce7b9d3187 100644 --- a/website/cue/reference/components/sinks/base/gcp_chronicle_unstructured.cue +++ b/website/cue/reference/components/sinks/base/gcp_chronicle_unstructured.cue @@ -180,18 +180,82 @@ base: components: sinks: gcp_chronicle_unstructured: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/gcp_cloud_storage.cue b/website/cue/reference/components/sinks/base/gcp_cloud_storage.cue index 636acde2cdbea..dc3e43c5aeb8e 100644 --- a/website/cue/reference/components/sinks/base/gcp_cloud_storage.cue +++ b/website/cue/reference/components/sinks/base/gcp_cloud_storage.cue @@ -264,18 +264,82 @@ base: components: sinks: gcp_cloud_storage: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/gcp_pubsub.cue b/website/cue/reference/components/sinks/base/gcp_pubsub.cue index e63e3e1afd92e..eae172d862485 100644 --- a/website/cue/reference/components/sinks/base/gcp_pubsub.cue +++ b/website/cue/reference/components/sinks/base/gcp_pubsub.cue @@ -178,18 +178,82 @@ base: components: sinks: gcp_pubsub: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/http.cue b/website/cue/reference/components/sinks/base/http.cue index 7ee407825918d..1b2d54839ac06 100644 --- a/website/cue/reference/components/sinks/base/http.cue +++ b/website/cue/reference/components/sinks/base/http.cue @@ -219,18 +219,82 @@ base: components: sinks: http: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/humio_logs.cue b/website/cue/reference/components/sinks/base/humio_logs.cue index 6ad5779de9eb4..fd60d17745e7b 100644 --- a/website/cue/reference/components/sinks/base/humio_logs.cue +++ b/website/cue/reference/components/sinks/base/humio_logs.cue @@ -172,18 +172,82 @@ base: components: sinks: humio_logs: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/kafka.cue b/website/cue/reference/components/sinks/base/kafka.cue index cca1134e00990..40486db2f2824 100644 --- a/website/cue/reference/components/sinks/base/kafka.cue +++ b/website/cue/reference/components/sinks/base/kafka.cue @@ -166,18 +166,82 @@ base: components: sinks: kafka: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/loki.cue b/website/cue/reference/components/sinks/base/loki.cue index dd02a5c16d792..bf3aad7a2c162 100644 --- a/website/cue/reference/components/sinks/base/loki.cue +++ b/website/cue/reference/components/sinks/base/loki.cue @@ -223,18 +223,82 @@ base: components: sinks: loki: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/nats.cue b/website/cue/reference/components/sinks/base/nats.cue index 3565f375d2f67..b6895c9990873 100644 --- a/website/cue/reference/components/sinks/base/nats.cue +++ b/website/cue/reference/components/sinks/base/nats.cue @@ -211,18 +211,82 @@ base: components: sinks: nats: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/papertrail.cue b/website/cue/reference/components/sinks/base/papertrail.cue index 8b4cce6d5c019..746ce8cf7d5a7 100644 --- a/website/cue/reference/components/sinks/base/papertrail.cue +++ b/website/cue/reference/components/sinks/base/papertrail.cue @@ -111,18 +111,82 @@ base: components: sinks: papertrail: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/pulsar.cue b/website/cue/reference/components/sinks/base/pulsar.cue index cc62959b42f8a..c528b90805739 100644 --- a/website/cue/reference/components/sinks/base/pulsar.cue +++ b/website/cue/reference/components/sinks/base/pulsar.cue @@ -205,18 +205,82 @@ base: components: sinks: pulsar: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/redis.cue b/website/cue/reference/components/sinks/base/redis.cue index f432ea7ff7fc3..eeb31cacf99b4 100644 --- a/website/cue/reference/components/sinks/base/redis.cue +++ b/website/cue/reference/components/sinks/base/redis.cue @@ -164,18 +164,82 @@ base: components: sinks: redis: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/socket.cue b/website/cue/reference/components/sinks/base/socket.cue index c0b0c62ad106e..bb0da606ac175 100644 --- a/website/cue/reference/components/sinks/base/socket.cue +++ b/website/cue/reference/components/sinks/base/socket.cue @@ -123,18 +123,82 @@ base: components: sinks: socket: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/splunk_hec_logs.cue b/website/cue/reference/components/sinks/base/splunk_hec_logs.cue index ab946626c47ab..1310e15065c15 100644 --- a/website/cue/reference/components/sinks/base/splunk_hec_logs.cue +++ b/website/cue/reference/components/sinks/base/splunk_hec_logs.cue @@ -222,18 +222,82 @@ base: components: sinks: splunk_hec_logs: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. - If a field is not present in the event, the output will be an empty string. + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). + + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/webhdfs.cue b/website/cue/reference/components/sinks/base/webhdfs.cue index b07f00ae779ea..caec116384336 100644 --- a/website/cue/reference/components/sinks/base/webhdfs.cue +++ b/website/cue/reference/components/sinks/base/webhdfs.cue @@ -172,18 +172,82 @@ base: components: sinks: webhdfs: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: { diff --git a/website/cue/reference/components/sinks/base/websocket.cue b/website/cue/reference/components/sinks/base/websocket.cue index ad51d78f9a46f..8b3770715ceff 100644 --- a/website/cue/reference/components/sinks/base/websocket.cue +++ b/website/cue/reference/components/sinks/base/websocket.cue @@ -158,18 +158,82 @@ base: components: sinks: websocket: configuration: { description: "The CSV Serializer Options." relevant_when: "codec = \"csv\"" required: true - type: object: options: fields: { - description: """ - Configures the fields that will be encoded, as well as the order in which they - appear in the output. + type: object: options: { + capacity: { + description: """ + Set the capacity (in bytes) of the internal buffer used in the CSV writer. + This defaults to a reasonable setting. + """ + required: false + type: uint: default: 8192 + } + delimiter: { + description: "The field delimiter to use when writing CSV." + required: false + type: uint: default: 44 + } + double_quote: { + description: """ + Enable double quote escapes. + + This is enabled by default, but it may be disabled. When disabled, quotes in + field data are escaped instead of doubled. + """ + required: false + type: bool: default: true + } + escape: { + description: """ + The escape character to use when writing CSV. - If a field is not present in the event, the output will be an empty string. + In some variants of CSV, quotes are escaped using a special escape character + like \\ (instead of escaping quotes by doubling them). - Values of type `Array`, `Object`, and `Regex` are not supported and the - output will be an empty string. - """ - required: true - type: array: items: type: string: {} + To use this `double_quotes` needs to be disabled as well otherwise it is ignored + """ + required: false + type: uint: default: 34 + } + fields: { + description: """ + Configures the fields that will be encoded, as well as the order in which they + appear in the output. + + If a field is not present in the event, the output will be an empty string. + + Values of type `Array`, `Object`, and `Regex` are not supported and the + output will be an empty string. + """ + required: true + type: array: items: type: string: {} + } + quote: { + description: "The quote character to use when writing CSV." + required: false + type: uint: default: 34 + } + quote_style: { + description: "The quoting style to use when writing CSV data." + required: false + type: string: { + default: "necessary" + enum: { + always: "This puts quotes around every field. Always." + necessary: """ + This puts quotes around fields only when necessary. + They are necessary when fields contain a quote, delimiter or record terminator. + Quotes are also necessary when writing an empty record + (which is indistinguishable from a record with one empty field). + """ + never: "This never writes quotes, even if it would produce invalid CSV data." + non_numeric: """ + This puts quotes around all fields that are non-numeric. + Namely, when writing a field that does not parse as a valid float or integer, + then quotes will be used even if they aren’t strictly necessary. + """ + } + } + } } } except_fields: {