Skip to content

Commit

Permalink
Adds LazyRawTextReader support for reading strings (#614)
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton committed Aug 23, 2023
1 parent b00fb2f commit 6d22b6f
Show file tree
Hide file tree
Showing 14 changed files with 586 additions and 140 deletions.
3 changes: 2 additions & 1 deletion src/lazy/binary/raw/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::lazy::decoder::private::LazyRawValuePrivate;
use crate::lazy::decoder::LazyRawValue;
use crate::lazy::encoding::BinaryEncoding;
use crate::lazy::raw_value_ref::RawValueRef;
use crate::lazy::str_ref::StrRef;
use crate::result::IonFailure;
use crate::types::SymbolId;
use crate::{Decimal, Int, IonError, IonResult, IonType, RawSymbolTokenRef, Timestamp};
Expand Down Expand Up @@ -390,7 +391,7 @@ impl<'data> LazyRawBinaryValue<'data> {
let raw_bytes = self.value_body()?;
let text = std::str::from_utf8(raw_bytes)
.map_err(|_| IonError::decoding_error("found a string with invalid utf-8 data"))?;
Ok(RawValueRef::String(text))
Ok(RawValueRef::String(StrRef::from(text)))
}

/// Helper method called by [`Self::read`]. Reads the current value as a blob.
Expand Down
1 change: 1 addition & 0 deletions src/lazy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub mod raw_stream_item;
pub mod raw_value_ref;
pub mod reader;
pub mod sequence;
pub mod str_ref;
pub mod r#struct;
pub mod system_reader;
pub mod system_stream_item;
Expand Down
27 changes: 25 additions & 2 deletions src/lazy/raw_value_ref.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::lazy::decoder::LazyDecoder;
use crate::lazy::str_ref::StrRef;
use crate::result::IonFailure;
use crate::{Decimal, Int, IonResult, IonType, RawSymbolTokenRef, Timestamp};
use std::fmt::{Debug, Formatter};
Expand All @@ -15,7 +16,7 @@ pub enum RawValueRef<'data, D: LazyDecoder<'data>> {
Float(f64),
Decimal(Decimal),
Timestamp(Timestamp),
String(&'data str),
String(StrRef<'data>),
Symbol(RawSymbolTokenRef<'data>),
Blob(&'data [u8]),
Clob(&'data [u8]),
Expand All @@ -24,6 +25,28 @@ pub enum RawValueRef<'data, D: LazyDecoder<'data>> {
Struct(D::Struct),
}

// Provides equality for scalar types, but not containers.
impl<'data, D: LazyDecoder<'data>> PartialEq for RawValueRef<'data, D> {
fn eq(&self, other: &Self) -> bool {
use RawValueRef::*;
match (self, other) {
(Null(i1), Null(i2)) => i1 == i2,
(Bool(b1), Bool(b2)) => b1 == b2,
(Int(i1), Int(i2)) => i1 == i2,
(Float(f1), Float(f2)) => f1 == f2,
(Decimal(d1), Decimal(d2)) => d1 == d2,
(Timestamp(t1), Timestamp(t2)) => t1 == t2,
(String(s1), String(s2)) => s1 == s2,
(Symbol(s1), Symbol(s2)) => s1 == s2,
(Blob(b1), Blob(b2)) => b1 == b2,
(Clob(c1), Clob(c2)) => c1 == c2,
// We cannot compare lazy containers as we cannot guarantee that their complete contents
// are available in the buffer. Is `{foo: bar}` equal to `{foo: b`?
_ => false,
}
}
}

impl<'data, D: LazyDecoder<'data>> Debug for RawValueRef<'data, D> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Expand Down Expand Up @@ -101,7 +124,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> {
}
}

pub fn expect_string(self) -> IonResult<&'data str> {
pub fn expect_string(self) -> IonResult<StrRef<'data>> {
if let RawValueRef::String(s) = self {
Ok(s)
} else {
Expand Down
82 changes: 82 additions & 0 deletions src/lazy/str_ref.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use crate::text::text_formatter::IonValueFormatter;
use crate::Str;
use std::borrow::Cow;
use std::fmt::{Display, Formatter};
use std::ops::Deref;

#[derive(Clone, PartialEq, Debug)]
pub struct StrRef<'data> {
text: Cow<'data, str>,
}

impl<'data> StrRef<'data> {
pub fn to_owned(&self) -> Str {
Str::from(self.as_ref())
}

pub fn into_owned(self) -> Str {
Str::from(self)
}

pub fn text(&self) -> &str {
self.as_ref()
}
}

impl<'data> Deref for StrRef<'data> {
type Target = str;

fn deref(&self) -> &Self::Target {
self.text.as_ref()
}
}

impl<'data> PartialEq<str> for StrRef<'data> {
fn eq(&self, other: &str) -> bool {
self.text() == other
}
}

impl<'data> PartialEq<&str> for StrRef<'data> {
fn eq(&self, other: &&str) -> bool {
self.text() == *other
}
}

impl<'data> PartialEq<StrRef<'data>> for str {
fn eq(&self, other: &StrRef<'data>) -> bool {
self == other.text()
}
}

impl<'data> Display for StrRef<'data> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut formatter = IonValueFormatter { output: f };
formatter
.format_string(self.text())
.map_err(|_| std::fmt::Error)
}
}

impl<'a> From<&'a str> for StrRef<'a> {
fn from(value: &'a str) -> Self {
StrRef {
text: Cow::from(value),
}
}
}

impl<'a> From<String> for StrRef<'a> {
fn from(value: String) -> Self {
StrRef {
text: Cow::from(value),
}
}
}

impl<'data> From<StrRef<'data>> for Str {
fn from(str_ref: StrRef<'data>) -> Self {
let text: String = str_ref.text.into_owned();
Str::from(text)
}
}
8 changes: 4 additions & 4 deletions src/lazy/struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
///# use ion_rs::IonResult;
///# fn main() -> IonResult<()> {
/// use ion_rs::{Element, IonType};
/// use ion_rs::lazy::reader::LazyBinaryReader;;
/// use ion_rs::lazy::reader::LazyBinaryReader;
/// use ion_rs::lazy::value_ref::ValueRef;
///
/// let ion_data = r#"{foo: "hello", bar: null.list, baz: 3, bar: 4}"#;
Expand All @@ -156,7 +156,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
///
/// let lazy_struct = reader.expect_next()?.read()?.expect_struct()?;
///
/// assert_eq!(lazy_struct.get("foo")?, Some(ValueRef::String("hello")));
/// assert_eq!(lazy_struct.get("foo")?, Some(ValueRef::String("hello".into())));
/// assert_eq!(lazy_struct.get("baz")?, Some(ValueRef::Int(3.into())));
/// assert_eq!(lazy_struct.get("bar")?, Some(ValueRef::Null(IonType::List)));
///# Ok(())
Expand All @@ -175,7 +175,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
///# use ion_rs::IonResult;
///# fn main() -> IonResult<()> {
/// use ion_rs::Element;
/// use ion_rs::lazy::reader::LazyBinaryReader;;
/// use ion_rs::lazy::reader::LazyBinaryReader;
/// use ion_rs::lazy::value_ref::ValueRef;
///
/// let ion_data = r#"{foo: "hello", bar: null.list, baz: 3, bar: 4}"#;
Expand All @@ -184,7 +184,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
///
/// let lazy_struct = reader.expect_next()?.read()?.expect_struct()?;
///
/// assert_eq!(lazy_struct.get_expected("foo")?, ValueRef::String("hello"));
/// assert_eq!(lazy_struct.get_expected("foo")?, ValueRef::String("hello".into()));
/// assert!(dbg!(lazy_struct.get_expected("Ontario")).is_err());
///# Ok(())
///# }
Expand Down
4 changes: 2 additions & 2 deletions src/lazy/system_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> {
fn process_symbols(pending_lst: &mut PendingLst, symbols: &D::Value) -> IonResult<()> {
if let RawValueRef::List(list) = symbols.read()? {
for symbol_text in list.iter() {
if let RawValueRef::String(text) = symbol_text?.read()? {
pending_lst.symbols.push(Some(text.to_owned()))
if let RawValueRef::String(str_ref) = symbol_text?.read()? {
pending_lst.symbols.push(Some(str_ref.text().to_owned()))
} else {
pending_lst.symbols.push(None)
}
Expand Down
6 changes: 6 additions & 0 deletions src/lazy/text/as_utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ pub(crate) trait AsUtf8 {
fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str>;
}

impl AsUtf8 for [u8] {
fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
bytes_as_utf8(self, position)
}
}

impl<const N: usize> AsUtf8 for SmallVec<[u8; N]> {
fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
bytes_as_utf8(self.as_ref(), position)
Expand Down
Loading

0 comments on commit 6d22b6f

Please sign in to comment.