diff --git a/datafusion-cli/src/format/print_format.rs b/datafusion-cli/src/format/print_format.rs index a9fc56b90524..85caaa3c5276 100644 --- a/datafusion-cli/src/format/print_format.rs +++ b/datafusion-cli/src/format/print_format.rs @@ -17,17 +17,19 @@ //! Print format variants use arrow::csv::writer::WriterBuilder; +use arrow::json::ArrayWriter; use datafusion::arrow::record_batch::RecordBatch; use datafusion::arrow::util::pretty; use datafusion::error::{DataFusionError, Result}; use std::str::FromStr; /// Allow records to be printed in different formats -#[derive(Debug, Clone)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum PrintFormat { Csv, Tsv, Table, + Json, } impl FromStr for PrintFormat { @@ -37,11 +39,24 @@ impl FromStr for PrintFormat { "csv" => Ok(Self::Csv), "tsv" => Ok(Self::Tsv), "table" => Ok(Self::Table), + "json" => Ok(Self::Json), _ => Err(()), } } } +fn print_batches_to_json(batches: &[RecordBatch]) -> Result { + let mut bytes = vec![]; + { + let mut writer = ArrayWriter::new(&mut bytes); + writer.write_batches(batches)?; + writer.finish()?; + } + let formatted = String::from_utf8(bytes) + .map_err(|e| DataFusionError::Execution(e.to_string()))?; + Ok(formatted) +} + fn print_batches_with_sep(batches: &[RecordBatch], delimiter: u8) -> Result { let mut bytes = vec![]; { @@ -65,7 +80,89 @@ impl PrintFormat { Self::Csv => println!("{}", print_batches_with_sep(batches, b',')?), Self::Tsv => println!("{}", print_batches_with_sep(batches, b'\t')?), Self::Table => pretty::print_batches(batches)?, + Self::Json => println!("{}", print_batches_to_json(batches)?), } Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use arrow::array::Int32Array; + use arrow::datatypes::{DataType, Field, Schema}; + use std::sync::Arc; + + #[test] + fn test_from_str() { + let format = "csv".parse::().unwrap(); + assert_eq!(PrintFormat::Csv, format); + + let format = "tsv".parse::().unwrap(); + assert_eq!(PrintFormat::Tsv, format); + + let format = "json".parse::().unwrap(); + assert_eq!(PrintFormat::Json, format); + + let format = "table".parse::().unwrap(); + assert_eq!(PrintFormat::Table, format); + } + + #[test] + fn test_from_str_failure() { + assert_eq!(true, "pretty".parse::().is_err()); + } + + #[test] + fn test_print_batches_with_sep() { + let batches = vec![]; + assert_eq!("", print_batches_with_sep(&batches, b',').unwrap()); + + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + ])); + + let batch = RecordBatch::try_new( + schema, + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(Int32Array::from(vec![4, 5, 6])), + Arc::new(Int32Array::from(vec![7, 8, 9])), + ], + ) + .unwrap(); + + let batches = vec![batch]; + let r = print_batches_with_sep(&batches, b',').unwrap(); + assert_eq!("a,b,c\n1,4,7\n2,5,8\n3,6,9\n", r); + } + + #[test] + fn test_print_batches_to_json_empty() { + let batches = vec![]; + let r = print_batches_to_json(&batches).unwrap(); + assert_eq!("", r); + + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + ])); + + let batch = RecordBatch::try_new( + schema, + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(Int32Array::from(vec![4, 5, 6])), + Arc::new(Int32Array::from(vec![7, 8, 9])), + ], + ) + .unwrap(); + + let batches = vec![batch]; + let r = print_batches_to_json(&batches).unwrap(); + assert_eq!("[{\"a\":1,\"b\":4,\"c\":7},{\"a\":2,\"b\":5,\"c\":8},{\"a\":3,\"b\":6,\"c\":9}]", r); + } +} diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 52d3ccc6123b..2360d4642484 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -66,7 +66,7 @@ pub async fn main() { ) .arg( Arg::with_name("format") - .help("Output format (possible values: table, csv, tsv)") + .help("Output format (possible values: table, csv, tsv, json)") .long("format") .default_value("table") .validator(is_valid_format)