Skip to content
99 changes: 98 additions & 1 deletion datafusion-cli/src/format/print_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@

//! Print format variants
use arrow::csv::writer::WriterBuilder;
use arrow::json::ArrayWriter;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::arrow::util::pretty;
use datafusion::error::{DataFusionError, Result};
use std::str::FromStr;

/// Allow records to be printed in different formats
#[derive(Debug, Clone)]
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum PrintFormat {
Csv,
Tsv,
Table,
Json,
}

impl FromStr for PrintFormat {
Expand All @@ -37,11 +39,24 @@ impl FromStr for PrintFormat {
"csv" => Ok(Self::Csv),
"tsv" => Ok(Self::Tsv),
"table" => Ok(Self::Table),
"json" => Ok(Self::Json),
_ => Err(()),
}
}
}

fn print_batches_to_json(batches: &[RecordBatch]) -> Result<String> {
let mut bytes = vec![];
{
let mut writer = ArrayWriter::new(&mut bytes);
writer.write_batches(batches)?;
writer.finish()?;
}
let formatted = String::from_utf8(bytes)
.map_err(|e| DataFusionError::Execution(e.to_string()))?;
Ok(formatted)
}

fn print_batches_with_sep(batches: &[RecordBatch], delimiter: u8) -> Result<String> {
let mut bytes = vec![];
{
Expand All @@ -65,7 +80,89 @@ impl PrintFormat {
Self::Csv => println!("{}", print_batches_with_sep(batches, b',')?),
Self::Tsv => println!("{}", print_batches_with_sep(batches, b'\t')?),
Self::Table => pretty::print_batches(batches)?,
Self::Json => println!("{}", print_batches_to_json(batches)?),
}
Ok(())
}
}

#[cfg(test)]
mod tests {
use super::*;
use arrow::array::Int32Array;
use arrow::datatypes::{DataType, Field, Schema};
use std::sync::Arc;

#[test]
fn test_from_str() {
let format = "csv".parse::<PrintFormat>().unwrap();
assert_eq!(PrintFormat::Csv, format);

let format = "tsv".parse::<PrintFormat>().unwrap();
assert_eq!(PrintFormat::Tsv, format);

let format = "json".parse::<PrintFormat>().unwrap();
assert_eq!(PrintFormat::Json, format);

let format = "table".parse::<PrintFormat>().unwrap();
assert_eq!(PrintFormat::Table, format);
}

#[test]
fn test_from_str_failure() {
assert_eq!(true, "pretty".parse::<PrintFormat>().is_err());
}

#[test]
fn test_print_batches_with_sep() {
let batches = vec![];
assert_eq!("", print_batches_with_sep(&batches, b',').unwrap());

let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
Field::new("c", DataType::Int32, false),
]));

let batch = RecordBatch::try_new(
schema,
vec![
Arc::new(Int32Array::from(vec![1, 2, 3])),
Arc::new(Int32Array::from(vec![4, 5, 6])),
Arc::new(Int32Array::from(vec![7, 8, 9])),
],
)
.unwrap();

let batches = vec![batch];
let r = print_batches_with_sep(&batches, b',').unwrap();
assert_eq!("a,b,c\n1,4,7\n2,5,8\n3,6,9\n", r);
}

#[test]
fn test_print_batches_to_json_empty() {
let batches = vec![];
let r = print_batches_to_json(&batches).unwrap();
assert_eq!("", r);

let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, false),
Field::new("c", DataType::Int32, false),
]));

let batch = RecordBatch::try_new(
schema,
vec![
Arc::new(Int32Array::from(vec![1, 2, 3])),
Arc::new(Int32Array::from(vec![4, 5, 6])),
Arc::new(Int32Array::from(vec![7, 8, 9])),
],
)
.unwrap();

let batches = vec![batch];
let r = print_batches_to_json(&batches).unwrap();
assert_eq!("[{\"a\":1,\"b\":4,\"c\":7},{\"a\":2,\"b\":5,\"c\":8},{\"a\":3,\"b\":6,\"c\":9}]", r);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯 for the tests

}
}
2 changes: 1 addition & 1 deletion datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ pub async fn main() {
)
.arg(
Arg::with_name("format")
.help("Output format (possible values: table, csv, tsv)")
.help("Output format (possible values: table, csv, tsv, json)")
.long("format")
.default_value("table")
.validator(is_valid_format)
Expand Down