From a5027e8cd727dd0c4e24a39846b19362afbcd68e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 9 Nov 2023 06:10:25 -0500 Subject: [PATCH] Backport: Fix serialization of large integers (#5038) (#5042) to `48.0.0_maintenance` (#5059) * Fix serialization of large integers (#5038) (#5042) * fmt --------- Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> --- arrow-json/src/reader/mod.rs | 30 ++++++++++++++++++++++++ arrow-json/src/reader/primitive_array.rs | 2 +- arrow-json/src/reader/tape.rs | 2 +- arrow-json/src/reader/timestamp_array.rs | 2 +- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index c1cef0ec81b..1bd63dec81d 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -2239,4 +2239,34 @@ mod tests { let values = b.column(0).as_primitive::().values(); assert_eq!(values, &[1, 2, 3, 4]); } + + #[test] + fn test_serde_large_numbers() { + let field = Field::new("int", DataType::Int64, true); + let mut decoder = ReaderBuilder::new_with_field(field) + .build_decoder() + .unwrap(); + + decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap(); + let b = decoder.flush().unwrap().unwrap(); + let values = b.column(0).as_primitive::().values(); + assert_eq!(values, &[1699148028689, 2, 3, 4]); + + let field = Field::new( + "int", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ); + let mut decoder = ReaderBuilder::new_with_field(field) + .build_decoder() + .unwrap(); + + decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap(); + let b = decoder.flush().unwrap().unwrap(); + let values = b + .column(0) + .as_primitive::() + .values(); + assert_eq!(values, &[1699148028689, 2, 3, 4]); + } } diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs index 6cf0bac8673..daefab4bf72 100644 --- a/arrow-json/src/reader/primitive_array.rs +++ b/arrow-json/src/reader/primitive_array.rs @@ -143,7 +143,7 @@ where }, TapeElement::I64(high) => match tape.get(p + 1) { TapeElement::I32(low) => { - let v = (high as i64) << 32 | low as i64; + let v = (high as i64) << 32 | (low as u32) as i64; let value = NumCast::from(v).ok_or_else(|| { ArrowError::JsonError(format!("failed to parse {v} as {d}",)) })?; diff --git a/arrow-json/src/reader/tape.rs b/arrow-json/src/reader/tape.rs index b39caede704..d7b6f26dd14 100644 --- a/arrow-json/src/reader/tape.rs +++ b/arrow-json/src/reader/tape.rs @@ -180,7 +180,7 @@ impl<'a> Tape<'a> { TapeElement::Null => out.push_str("null"), TapeElement::I64(high) => match self.get(idx + 1) { TapeElement::I32(low) => { - let val = (high as i64) << 32 | low as i64; + let val = (high as i64) << 32 | (low as u32) as i64; let _ = write!(out, "{val}"); return idx + 2; } diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs index 09672614107..5da4868dd45 100644 --- a/arrow-json/src/reader/timestamp_array.rs +++ b/arrow-json/src/reader/timestamp_array.rs @@ -99,7 +99,7 @@ where TapeElement::I32(v) => builder.append_value(v as i64), TapeElement::I64(high) => match tape.get(p + 1) { TapeElement::I32(low) => { - builder.append_value((high as i64) << 32 | low as i64) + builder.append_value((high as i64) << 32 | (low as u32) as i64) } _ => unreachable!(), },