From 34e498682b1b6faa1465cb25f871fe2f21811890 Mon Sep 17 00:00:00 2001 From: Rafael Guerreiro Date: Thu, 16 Feb 2023 11:58:13 -0800 Subject: [PATCH] Using Borrow to avoid consuming the value if you need to keep the json and get the schema. This avoids unnecessary cloning the entire json. --- arrow-json/src/reader.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arrow-json/src/reader.rs b/arrow-json/src/reader.rs index 1d4cfc740fdf..54e687a8b47b 100644 --- a/arrow-json/src/reader.rs +++ b/arrow-json/src/reader.rs @@ -46,6 +46,7 @@ //! let batch = json.next().unwrap().unwrap(); //! ``` +use std::borrow::Borrow; use std::io::{BufRead, BufReader, Read, Seek}; use std::sync::Arc; @@ -526,16 +527,17 @@ fn collect_field_types_from_object( /// The reason we diverge here is because we don't have utilities to deal with JSON data once it's /// interpreted as Strings. We should match Spark's behavior once we added more JSON parsing /// kernels in the future. -pub fn infer_json_schema_from_iterator(value_iter: I) -> Result +pub fn infer_json_schema_from_iterator(value_iter: I) -> Result where - I: Iterator>, + I: Iterator>, + V: Borrow, { let mut field_types: HashMap = HashMap::new(); for record in value_iter { - match record? { + match record?.borrow() { Value::Object(map) => { - collect_field_types_from_object(&mut field_types, &map)?; + collect_field_types_from_object(&mut field_types, map)?; } value => { return Err(ArrowError::JsonError(format!(