diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml index 687e91ac4df..f4f6a336498 100644 --- a/integration-testing/Cargo.toml +++ b/integration-testing/Cargo.toml @@ -34,7 +34,7 @@ logging = ["tracing-subscriber"] arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json"] } arrow-flight = { path = "../arrow-flight", default-features = false } async-trait = { version = "0.1.41", default-features = false } -clap = { version = "3", default-features = false, features = ["std", "derive"] } +clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] } futures = { version = "0.3", default-features = false } hex = { version = "0.4", default-features = false, features = ["std"] } prost = { version = "0.11", default-features = false } diff --git a/integration-testing/src/bin/arrow-json-integration-test.rs b/integration-testing/src/bin/arrow-json-integration-test.rs index a7d7cf6ee7c..b84680f6f4b 100644 --- a/integration-testing/src/bin/arrow-json-integration-test.rs +++ b/integration-testing/src/bin/arrow-json-integration-test.rs @@ -24,7 +24,7 @@ use arrow_integration_testing::{read_json_file, util::*}; use clap::Parser; use std::fs::File; -#[derive(clap::ArgEnum, Debug, Clone)] +#[derive(clap::ValueEnum, Debug, Clone)] #[clap(rename_all = "SCREAMING_SNAKE_CASE")] enum Mode { ArrowToJson, @@ -41,7 +41,7 @@ struct Args { arrow: String, #[clap(short, long, help("Path to JSON file"))] json: String, - #[clap(arg_enum, short, long, default_value_t = Mode::Validate, help="Mode of integration testing tool")] + #[clap(value_enum, short, long, default_value_t = Mode::Validate, help="Mode of integration testing tool")] mode: Mode, #[clap(short, long)] verbose: bool, diff --git a/integration-testing/src/bin/flight-test-integration-client.rs b/integration-testing/src/bin/flight-test-integration-client.rs index fa99b424e37..d46b4fac759 100644 --- a/integration-testing/src/bin/flight-test-integration-client.rs +++ b/integration-testing/src/bin/flight-test-integration-client.rs @@ -20,7 +20,7 @@ use clap::Parser; type Error = Box; type Result = std::result::Result; -#[derive(clap::ArgEnum, Debug, Clone)] +#[derive(clap::ValueEnum, Debug, Clone)] enum Scenario { Middleware, #[clap(name = "auth:basic_proto")] @@ -40,7 +40,7 @@ struct Args { help = "path to the descriptor file, only used when scenario is not provided. See https://arrow.apache.org/docs/format/Integration.html#json-test-data-format" )] path: Option, - #[clap(long, arg_enum)] + #[clap(long, value_enum)] scenario: Option, } diff --git a/integration-testing/src/bin/flight-test-integration-server.rs b/integration-testing/src/bin/flight-test-integration-server.rs index 6ed22ad81d9..5310d07d4f8 100644 --- a/integration-testing/src/bin/flight-test-integration-server.rs +++ b/integration-testing/src/bin/flight-test-integration-server.rs @@ -21,7 +21,7 @@ use clap::Parser; type Error = Box; type Result = std::result::Result; -#[derive(clap::ArgEnum, Debug, Clone)] +#[derive(clap::ValueEnum, Debug, Clone)] enum Scenario { Middleware, #[clap(name = "auth:basic_proto")] @@ -33,7 +33,7 @@ enum Scenario { struct Args { #[clap(long)] port: u16, - #[clap(long, arg_enum)] + #[clap(long, value_enum)] scenario: Option, } diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml index e1593e4b9c9..f47f556b257 100644 --- a/parquet/Cargo.toml +++ b/parquet/Cargo.toml @@ -43,7 +43,7 @@ num = { version = "0.4", default-features = false } num-bigint = { version = "0.4", default-features = false } arrow = { path = "../arrow", version = "24.0.0", optional = true, default-features = false, features = ["ipc"] } base64 = { version = "0.13", default-features = false, features = ["std"], optional = true } -clap = { version = "3", default-features = false, features = ["std", "derive", "env"], optional = true } +clap = { version = "4", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true } serde_json = { version = "1.0", default-features = false, features = ["std"], optional = true } seq-macro = { version = "0.3", default-features = false } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } diff --git a/parquet/src/bin/parquet-fromcsv-help.txt b/parquet/src/bin/parquet-fromcsv-help.txt index f4fe704ab26..f599a13f0f1 100644 --- a/parquet/src/bin/parquet-fromcsv-help.txt +++ b/parquet/src/bin/parquet-fromcsv-help.txt @@ -1,66 +1,65 @@ -Apache Arrow -Binary to convert csv to Parquet -USAGE: - parquet [OPTIONS] --schema --input-file --output-file +Usage: parquet [OPTIONS] --schema --input-file --output-file -OPTIONS: - -b, --batch-size - batch size - - [env: PARQUET_FROM_CSV_BATCHSIZE=] - [default: 1000] +Options: + -s, --schema + message schema for output Parquet - -c, --parquet-compression - compression mode - - [default: SNAPPY] + -i, --input-file + input CSV file - -d, --delimiter - field delimiter - - default value: when input_format==CSV: ',' when input_format==TSV: 'TAB' + -o, --output-file + output Parquet file - -D, --double-quote - double quote + -f, --input-format + input file format + + [default: csv] + [possible values: csv, tsv] - -e, --escape-char - escape charactor + -b, --batch-size + batch size + + [env: PARQUET_FROM_CSV_BATCHSIZE=] + [default: 1000] - -f, --input-format - input file format - - [default: csv] - [possible values: csv, tsv] + -h, --has-header + has header - -h, --has-header - has header + -d, --delimiter + field delimiter + + default value: when input_format==CSV: ',' when input_format==TSV: 'TAB' - --help - Print help information + -r, --record-terminator + record terminator + + [possible values: lf, crlf, cr] - -i, --input-file - input CSV file + -e, --escape-char + escape charactor - -m, --max-row-group-size - max row group size + -q, --quote-char + quate charactor - -o, --output-file - output Parquet file + -D, --double-quote + double quote + + [possible values: true, false] - -q, --quote-char - quate charactor + -c, --parquet-compression + compression mode + + [default: SNAPPY] - -r, --record-terminator - record terminator - - [possible values: lf, crlf, cr] + -w, --writer-version + writer version - -s, --schema - message schema for output Parquet + -m, --max-row-group-size + max row group size - -V, --version - Print version information + --help + display usage help - -w, --writer-version - writer version + -V, --version + Print version information diff --git a/parquet/src/bin/parquet-fromcsv.rs b/parquet/src/bin/parquet-fromcsv.rs index 827aa7311f5..90e0a68625f 100644 --- a/parquet/src/bin/parquet-fromcsv.rs +++ b/parquet/src/bin/parquet-fromcsv.rs @@ -72,7 +72,7 @@ use std::{ }; use arrow::{csv::ReaderBuilder, datatypes::Schema, error::ArrowError}; -use clap::{ArgEnum, Parser}; +use clap::{Parser, ValueEnum}; use parquet::{ arrow::{parquet_to_arrow_schema, ArrowWriter}, basic::Compression, @@ -140,7 +140,7 @@ impl Display for ParquetFromCsvError { } #[derive(Debug, Parser)] -#[clap(author, version, about("Binary to convert csv to Parquet"), long_about=None)] +#[clap(author, version, disable_help_flag=true, about("Binary to convert csv to Parquet"), long_about=None)] struct Args { /// Path to a text file containing a parquet schema definition #[clap(short, long, help("message schema for output Parquet"))] @@ -153,7 +153,7 @@ struct Args { output_file: PathBuf, /// input file format #[clap( - arg_enum, + value_enum, short('f'), long, help("input file format"), @@ -179,7 +179,7 @@ struct Args { /// when input_format==TSV: 'TAB' #[clap(short, long, help("field delimiter"))] delimiter: Option, - #[clap(arg_enum, short, long, help("record terminator"))] + #[clap(value_enum, short, long, help("record terminator"))] record_terminator: Option, #[clap(short, long, help("escape charactor"))] escape_char: Option, @@ -188,14 +188,17 @@ struct Args { #[clap(short('D'), long, help("double quote"))] double_quote: Option, #[clap(short('c'), long, help("compression mode"), default_value_t=Compression::SNAPPY)] - #[clap(parse(try_from_str =compression_from_str))] + #[clap(value_parser=compression_from_str)] parquet_compression: Compression, #[clap(short, long, help("writer version"))] - #[clap(parse(try_from_str =writer_version_from_str))] + #[clap(value_parser=writer_version_from_str)] writer_version: Option, #[clap(short, long, help("max row group size"))] max_row_group_size: Option, + + #[clap(long, action=clap::ArgAction::Help, help("display usage help"))] + help: Option, } fn compression_from_str(cmp: &str) -> Result { @@ -208,7 +211,7 @@ fn compression_from_str(cmp: &str) -> Result { "LZ4" => Ok(Compression::LZ4), "ZSTD" => Ok(Compression::ZSTD), v => Err( - format!("Unknown compression {0} : possible values UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD ",v) + format!("Unknown compression {0} : possible values UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD \n\nFor more information try --help",v) ) } } @@ -263,13 +266,13 @@ impl Args { } } -#[derive(Debug, Clone, Copy, ArgEnum, PartialEq)] +#[derive(Debug, Clone, Copy, ValueEnum, PartialEq)] enum CsvDialect { Csv, Tsv, } -#[derive(Debug, Clone, Copy, ArgEnum, PartialEq)] +#[derive(Debug, Clone, Copy, ValueEnum, PartialEq)] enum RecordTerminator { LF, Crlf, @@ -544,6 +547,7 @@ mod tests { parquet_compression: Compression::SNAPPY, writer_version: None, max_row_group_size: None, + help: None, }; let arrow_schema = Arc::new(Schema::new(vec![ Field::new("field1", DataType::Utf8, false), @@ -577,6 +581,7 @@ mod tests { parquet_compression: Compression::SNAPPY, writer_version: None, max_row_group_size: None, + help: None, }; let arrow_schema = Arc::new(Schema::new(vec![ Field::new("field1", DataType::Utf8, false), @@ -630,6 +635,7 @@ mod tests { parquet_compression: Compression::SNAPPY, writer_version: None, max_row_group_size: None, + help: None, }; convert_csv_to_parquet(&args).unwrap(); } diff --git a/parquet/src/bin/parquet-rowcount.rs b/parquet/src/bin/parquet-rowcount.rs index d2f0311cf7a..491f582c510 100644 --- a/parquet/src/bin/parquet-rowcount.rs +++ b/parquet/src/bin/parquet-rowcount.rs @@ -47,7 +47,7 @@ struct Args { #[clap( short, long, - multiple_values(true), + number_of_values(1), help("List of Parquet files to read from separated by space") )] file_paths: Vec,