Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(format): better checking of format options. #8981

Merged
merged 20 commits into from
Nov 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
7e6ecaf
refactor(format): mv str helpers from crate databalues to formats.
youngsofun Nov 26, 2022
c596bf5
feat(format): add FormatOptionChecker.
youngsofun Nov 26, 2022
a99b2df
refactor(format): mv ident_case_sensitive from FormatSetting to Field…
youngsofun Nov 26, 2022
a256c26
refactor(format): remove unused fields in FormatSettings.
youngsofun Nov 26, 2022
2c9e3b9
refactor(format): remove unused trait PrimitiveWithFormat.
youngsofun Nov 26, 2022
d63da3d
refactor(format): remove FormatSettings from input_format.
youngsofun Nov 26, 2022
01c3795
feat: enable option nan_display.
youngsofun Nov 26, 2022
5aeeaf6
feat(format): better checking of format options.
youngsofun Nov 26, 2022
06cca62
fix clippy
youngsofun Nov 26, 2022
f276b44
fix flippy
youngsofun Nov 26, 2022
8a533ca
feat(format): fix TSV option checking.
youngsofun Nov 26, 2022
a79558f
refactor(format): use consts.
youngsofun Nov 26, 2022
0abd43c
feat(format): check_nan_display.
youngsofun Nov 26, 2022
71f8036
feat(format): csv check_record_delimiter.
youngsofun Nov 26, 2022
00fce41
feat(format): escape \r \n in error msg.
youngsofun Nov 26, 2022
e8e8930
test(format): only allow \r or \r\n as format_record_delimiter.
youngsofun Nov 26, 2022
b9f6325
refactor(format): move enum RecordDelimiter from crate source to format.
youngsofun Nov 26, 2022
f716a06
refactor(format): fix old tests, should not use format_skip_header fo…
youngsofun Nov 26, 2022
fd6147d
refactor(format): move get_field_delimiter() from InputFormat to File…
youngsofun Nov 26, 2022
a121928
Merge branch 'main' into fmt
mergify[bot] Nov 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 3 additions & 131 deletions src/common/io/src/format_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,145 +13,17 @@
// limitations under the License.

use chrono_tz::Tz;
use common_exception::ErrorCode;
use common_exception::Result;

use crate::consts::*;

// fixed the format in struct/array,
// when it`s repr as a string in csv/tsv/json/...
// should be compatible with the format used SQL
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NestedFormatSettings {
pub true_bytes: Vec<u8>,
pub false_bytes: Vec<u8>,
pub null_bytes: Vec<u8>,
pub nan_bytes: Vec<u8>,
pub inf_bytes: Vec<u8>,
pub quote_char: u8,
}

impl Default for NestedFormatSettings {
fn default() -> Self {
NestedFormatSettings {
true_bytes: TRUE_BYTES_NUM.as_bytes().to_vec(),
false_bytes: FALSE_BYTES_NUM.as_bytes().to_vec(),
null_bytes: NULL_BYTES_UPPER.as_bytes().to_vec(),
nan_bytes: NAN_BYTES_LOWER.as_bytes().to_vec(),
inf_bytes: INF_BYTES_LOWER.as_bytes().to_vec(),
quote_char: b'\'',
}
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FormatSettings {
// both
pub timezone: Tz,
TCeason marked this conversation as resolved.
Show resolved Hide resolved

// inner
pub nested: NestedFormatSettings,

// outer
pub true_bytes: Vec<u8>,
pub false_bytes: Vec<u8>,
pub null_bytes: Vec<u8>,
pub nan_bytes: Vec<u8>,
pub inf_bytes: Vec<u8>,
pub quote_char: u8,
pub escape: Option<u8>,

pub record_delimiter: Vec<u8>,
pub field_delimiter: Vec<u8>,
pub empty_as_default: bool,

pub json_quote_denormals: bool,
pub json_escape_forward_slashes: bool,
pub ident_case_sensitive: bool,

pub row_tag: Vec<u8>,
}

impl FormatSettings {
pub fn parse_escape(option: &str, default: Option<u8>) -> Option<u8> {
if option.is_empty() {
default
} else {
Some(option.as_bytes()[0])
}
}

pub fn parse_quote(option: &str) -> Result<u8> {
if option.len() != 1 {
Err(ErrorCode::InvalidArgument(
"quote_char can only contain one char",
))
} else {
Ok(option.as_bytes()[0])
}
}

pub fn parse_row_tag(option: &str) -> Result<Vec<u8>> {
if option.is_empty() {
return Ok(vec![b'r', b'o', b'w']);
}
Ok(Vec::from(option))
}

pub fn for_values_parsing() -> Self {
Self {
timezone: "UTC".parse::<Tz>().unwrap(),
nested: Default::default(),

true_bytes: TRUE_BYTES_LOWER.as_bytes().to_vec(),
false_bytes: FALSE_BYTES_LOWER.as_bytes().to_vec(),
null_bytes: NULL_BYTES_UPPER.as_bytes().to_vec(),
nan_bytes: NAN_BYTES_LOWER.as_bytes().to_vec(),
inf_bytes: INF_BYTES_LOWER.as_bytes().to_vec(),
quote_char: b'\'',
escape: Some(b'\\'),

record_delimiter: vec![b'\n'],
field_delimiter: vec![b'\t'],

// not used
empty_as_default: true,
json_quote_denormals: false,
json_escape_forward_slashes: true,
ident_case_sensitive: false,
row_tag: vec![],
}
}

fn tsv_default() -> Self {
Self {
timezone: "UTC".parse::<Tz>().unwrap(),
nested: Default::default(),

true_bytes: TRUE_BYTES_NUM.as_bytes().to_vec(),
false_bytes: FALSE_BYTES_NUM.as_bytes().to_vec(),
nan_bytes: NAN_BYTES_LOWER.as_bytes().to_vec(),
inf_bytes: INF_BYTES_LOWER.as_bytes().to_vec(),
null_bytes: NULL_BYTES_ESCAPE.as_bytes().to_vec(),
quote_char: b'\'',
escape: Some(b'\\'),

record_delimiter: vec![b'\n'],
field_delimiter: vec![b'\t'],

// not used
empty_as_default: true,
json_quote_denormals: false,
json_escape_forward_slashes: true,
ident_case_sensitive: false,
row_tag: vec![],
}
}
}

// only used for tests
youngsofun marked this conversation as resolved.
Show resolved Hide resolved
impl Default for FormatSettings {
fn default() -> Self {
FormatSettings::tsv_default()
Self {
timezone: "UTC".parse::<Tz>().unwrap(),
}
}
}
34 changes: 0 additions & 34 deletions src/query/datavalues/src/types/serializations/helper/csv.rs

This file was deleted.

143 changes: 0 additions & 143 deletions src/query/datavalues/src/types/serializations/helper/json.rs

This file was deleted.

4 changes: 0 additions & 4 deletions src/query/datavalues/src/types/serializations/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ mod array;
mod boolean;
mod const_;
mod date;
pub mod helper;
mod null;
mod nullable;
mod number;
Expand All @@ -34,9 +33,6 @@ use common_io::prelude::FormatSettings;
pub use const_::ConstSerializer;
pub use date::DateSerializer;
use enum_dispatch::enum_dispatch;
pub use helper::csv::write_csv_string;
pub use helper::escape::write_escaped_string;
pub use helper::json::write_json_string;
pub use null::NullSerializer;
pub use nullable::NullableSerializer;
pub use number::NumberSerializer;
Expand Down
Loading