diff --git a/examples/lazy_read_all_values.rs b/examples/lazy_read_all_values.rs index ca09368f..53a2d20f 100644 --- a/examples/lazy_read_all_values.rs +++ b/examples/lazy_read_all_values.rs @@ -1,11 +1,11 @@ +#[cfg(feature = "experimental-lazy-reader")] +use ion_rs::IonResult; + #[cfg(not(feature = "experimental-lazy-reader"))] fn main() { println!("This example requires the 'experimental-lazy-reader' feature to work."); } -#[cfg(feature = "experimental-lazy-reader")] -use ion_rs::IonResult; - #[cfg(feature = "experimental-lazy-reader")] fn main() -> IonResult<()> { lazy_reader_example::read_all_values() @@ -13,16 +13,16 @@ fn main() -> IonResult<()> { #[cfg(feature = "experimental-lazy-reader")] mod lazy_reader_example { + use std::fs::File; + use std::process::exit; + + use memmap::MmapOptions; use ion_rs::lazy::r#struct::LazyBinaryStruct; use ion_rs::lazy::reader::LazyBinaryReader; - use ion_rs::lazy::sequence::LazyBinarySequence; use ion_rs::lazy::value::LazyBinaryValue; use ion_rs::lazy::value_ref::ValueRef; use ion_rs::IonResult; - use memmap::MmapOptions; - use std::fs::File; - use std::process::exit; pub fn read_all_values() -> IonResult<()> { let args: Vec = std::env::args().collect(); @@ -53,14 +53,17 @@ mod lazy_reader_example { fn count_value_and_children(lazy_value: &LazyBinaryValue) -> IonResult { use ValueRef::*; let child_count = match lazy_value.read()? { - List(s) | SExp(s) => count_sequence_children(&s)?, + List(s) => count_sequence_children(s.iter())?, + SExp(s) => count_sequence_children(s.iter())?, Struct(s) => count_struct_children(&s)?, _ => 0, }; Ok(1 + child_count) } - fn count_sequence_children(lazy_sequence: &LazyBinarySequence) -> IonResult { + fn count_sequence_children<'a, 'b>( + lazy_sequence: impl Iterator>>, + ) -> IonResult { let mut count = 0; for value in lazy_sequence { count += count_value_and_children(&value?)?; diff --git a/src/lazy/any_encoding.rs b/src/lazy/any_encoding.rs index 7d6c1d37..e5137589 100644 --- a/src/lazy/any_encoding.rs +++ b/src/lazy/any_encoding.rs @@ -7,7 +7,9 @@ use crate::lazy::binary::raw::r#struct::{ LazyRawBinaryField, LazyRawBinaryStruct, RawBinaryStructIterator, }; use crate::lazy::binary::raw::reader::LazyRawBinaryReader; -use crate::lazy::binary::raw::sequence::{LazyRawBinarySequence, RawBinarySequenceIterator}; +use crate::lazy::binary::raw::sequence::{ + LazyRawBinaryList, LazyRawBinarySExp, RawBinarySequenceIterator, +}; use crate::lazy::binary::raw::value::LazyRawBinaryValue; use crate::lazy::decoder::private::{ LazyContainerPrivate, LazyRawFieldPrivate, LazyRawValuePrivate, @@ -22,7 +24,9 @@ use crate::lazy::text::raw::r#struct::{ LazyRawTextField, LazyRawTextStruct, RawTextStructIterator, }; use crate::lazy::text::raw::reader::LazyRawTextReader; -use crate::lazy::text::raw::sequence::{LazyRawTextSequence, RawTextSequenceIterator}; +use crate::lazy::text::raw::sequence::{ + LazyRawTextList, LazyRawTextSExp, RawTextListIterator, RawTextSExpIterator, +}; use crate::lazy::text::value::{LazyRawTextValue, RawTextAnnotationsIterator}; use crate::{IonResult, IonType, RawSymbolTokenRef}; @@ -36,7 +40,8 @@ pub struct AnyEncoding; impl<'data> LazyDecoder<'data> for AnyEncoding { type Reader = LazyRawAnyReader<'data>; type Value = LazyRawAnyValue<'data>; - type Sequence = LazyRawAnySequence<'data>; + type List = LazyRawAnyList<'data>; + type SExp = LazyRawAnySExp<'data>; type Struct = LazyRawAnyStruct<'data>; type AnnotationsIterator = RawAnyAnnotationsIterator<'data>; } @@ -246,101 +251,200 @@ impl<'data> Iterator for RawAnyAnnotationsIterator<'data> { } } -// ===== Sequences ====== +// ===== Lists ====== + +#[derive(Debug, Clone)] +pub struct LazyRawAnyList<'data> { + encoding: LazyRawListKind<'data>, +} + +#[derive(Debug, Clone)] +pub enum LazyRawListKind<'data> { + Text_1_0(LazyRawTextList<'data>), + Binary_1_0(LazyRawBinaryList<'data>), +} + +impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnyList<'data> { + fn from_value(value: LazyRawAnyValue<'data>) -> Self { + match value.encoding { + LazyRawValueKind::Text_1_0(v) => LazyRawAnyList { + encoding: LazyRawListKind::Text_1_0(LazyRawTextList::from_value(v)), + }, + LazyRawValueKind::Binary_1_0(v) => LazyRawAnyList { + encoding: LazyRawListKind::Binary_1_0(LazyRawBinaryList::from_value(v)), + }, + } + } +} + +pub struct RawAnyListIterator<'data> { + encoding: RawAnyListIteratorKind<'data>, +} + +pub enum RawAnyListIteratorKind<'data> { + Text_1_0(RawTextListIterator<'data>), + Binary_1_0(RawBinarySequenceIterator<'data>), +} + +impl<'data> Iterator for RawAnyListIterator<'data> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + match &mut self.encoding { + RawAnyListIteratorKind::Text_1_0(i) => i + .next() + .map(|value_result| value_result.map(|value| value.into())), + RawAnyListIteratorKind::Binary_1_0(i) => i + .next() + .map(|value_result| value_result.map(|value| value.into())), + } + } +} + +impl<'data> LazyRawSequence<'data, AnyEncoding> for LazyRawAnyList<'data> { + type Iterator = RawAnyListIterator<'data>; + + fn annotations(&self) -> >::AnnotationsIterator { + self.as_value().annotations() + } + + fn ion_type(&self) -> IonType { + match &self.encoding { + LazyRawListKind::Text_1_0(s) => s.ion_type(), + LazyRawListKind::Binary_1_0(s) => s.ion_type(), + } + } + + fn iter(&self) -> Self::Iterator { + match &self.encoding { + LazyRawListKind::Text_1_0(s) => RawAnyListIterator { + encoding: RawAnyListIteratorKind::Text_1_0(s.iter()), + }, + LazyRawListKind::Binary_1_0(s) => RawAnyListIterator { + encoding: RawAnyListIteratorKind::Binary_1_0(s.iter()), + }, + } + } + + fn as_value(&self) -> LazyRawAnyValue<'data> { + match &self.encoding { + LazyRawListKind::Text_1_0(s) => (s.as_value()).into(), + LazyRawListKind::Binary_1_0(s) => (s.as_value()).into(), + } + } +} + +impl<'data> From> for LazyRawAnyList<'data> { + fn from(value: LazyRawTextList<'data>) -> Self { + LazyRawAnyList { + encoding: LazyRawListKind::Text_1_0(value), + } + } +} + +impl<'data> From> for LazyRawAnyList<'data> { + fn from(value: LazyRawBinaryList<'data>) -> Self { + LazyRawAnyList { + encoding: LazyRawListKind::Binary_1_0(value), + } + } +} + +// ===== SExps ===== #[derive(Debug, Clone)] -pub struct LazyRawAnySequence<'data> { - encoding: LazyRawSequenceKind<'data>, +pub struct LazyRawAnySExp<'data> { + encoding: LazyRawSExpKind<'data>, } #[derive(Debug, Clone)] -pub enum LazyRawSequenceKind<'data> { - Text_1_0(LazyRawTextSequence<'data>), - Binary_1_0(LazyRawBinarySequence<'data>), +pub enum LazyRawSExpKind<'data> { + Text_1_0(LazyRawTextSExp<'data>), + Binary_1_0(LazyRawBinarySExp<'data>), } -impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnySequence<'data> { +impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnySExp<'data> { fn from_value(value: LazyRawAnyValue<'data>) -> Self { match value.encoding { - LazyRawValueKind::Text_1_0(v) => LazyRawAnySequence { - encoding: LazyRawSequenceKind::Text_1_0(LazyRawTextSequence::from_value(v)), + LazyRawValueKind::Text_1_0(v) => LazyRawAnySExp { + encoding: LazyRawSExpKind::Text_1_0(LazyRawTextSExp::from_value(v)), }, - LazyRawValueKind::Binary_1_0(v) => LazyRawAnySequence { - encoding: LazyRawSequenceKind::Binary_1_0(LazyRawBinarySequence::from_value(v)), + LazyRawValueKind::Binary_1_0(v) => LazyRawAnySExp { + encoding: LazyRawSExpKind::Binary_1_0(LazyRawBinarySExp::from_value(v)), }, } } } -pub struct RawAnySequenceIterator<'data> { - encoding: RawAnySequenceIteratorKind<'data>, +pub struct RawAnySExpIterator<'data> { + encoding: RawAnySExpIteratorKind<'data>, } -pub enum RawAnySequenceIteratorKind<'data> { - Text_1_0(RawTextSequenceIterator<'data>), +pub enum RawAnySExpIteratorKind<'data> { + Text_1_0(RawTextSExpIterator<'data>), Binary_1_0(RawBinarySequenceIterator<'data>), } -impl<'data> Iterator for RawAnySequenceIterator<'data> { +impl<'data> Iterator for RawAnySExpIterator<'data> { type Item = IonResult>; fn next(&mut self) -> Option { match &mut self.encoding { - RawAnySequenceIteratorKind::Text_1_0(i) => i + RawAnySExpIteratorKind::Text_1_0(i) => i .next() .map(|value_result| value_result.map(|value| value.into())), - RawAnySequenceIteratorKind::Binary_1_0(i) => i + RawAnySExpIteratorKind::Binary_1_0(i) => i .next() .map(|value_result| value_result.map(|value| value.into())), } } } -impl<'data> LazyRawSequence<'data, AnyEncoding> for LazyRawAnySequence<'data> { - type Iterator = RawAnySequenceIterator<'data>; +impl<'data> LazyRawSequence<'data, AnyEncoding> for LazyRawAnySExp<'data> { + type Iterator = RawAnySExpIterator<'data>; fn annotations(&self) -> >::AnnotationsIterator { - todo!() + self.as_value().annotations() } fn ion_type(&self) -> IonType { match &self.encoding { - LazyRawSequenceKind::Text_1_0(s) => s.ion_type(), - LazyRawSequenceKind::Binary_1_0(s) => s.ion_type(), + LazyRawSExpKind::Text_1_0(s) => s.ion_type(), + LazyRawSExpKind::Binary_1_0(s) => s.ion_type(), } } fn iter(&self) -> Self::Iterator { match &self.encoding { - LazyRawSequenceKind::Text_1_0(s) => RawAnySequenceIterator { - encoding: RawAnySequenceIteratorKind::Text_1_0(s.iter()), + LazyRawSExpKind::Text_1_0(s) => RawAnySExpIterator { + encoding: RawAnySExpIteratorKind::Text_1_0(s.iter()), }, - LazyRawSequenceKind::Binary_1_0(s) => RawAnySequenceIterator { - encoding: RawAnySequenceIteratorKind::Binary_1_0(s.iter()), + LazyRawSExpKind::Binary_1_0(s) => RawAnySExpIterator { + encoding: RawAnySExpIteratorKind::Binary_1_0(s.iter()), }, } } fn as_value(&self) -> LazyRawAnyValue<'data> { match &self.encoding { - LazyRawSequenceKind::Text_1_0(s) => (s.as_value()).into(), - LazyRawSequenceKind::Binary_1_0(s) => (s.as_value()).into(), + LazyRawSExpKind::Text_1_0(s) => (s.as_value()).into(), + LazyRawSExpKind::Binary_1_0(s) => (s.as_value()).into(), } } } -impl<'data> From> for LazyRawAnySequence<'data> { - fn from(value: LazyRawTextSequence<'data>) -> Self { - LazyRawAnySequence { - encoding: LazyRawSequenceKind::Text_1_0(value), +impl<'data> From> for LazyRawAnySExp<'data> { + fn from(value: LazyRawTextSExp<'data>) -> Self { + LazyRawAnySExp { + encoding: LazyRawSExpKind::Text_1_0(value), } } } -impl<'data> From> for LazyRawAnySequence<'data> { - fn from(value: LazyRawBinarySequence<'data>) -> Self { - LazyRawAnySequence { - encoding: LazyRawSequenceKind::Binary_1_0(value), +impl<'data> From> for LazyRawAnySExp<'data> { + fn from(value: LazyRawBinarySExp<'data>) -> Self { + LazyRawAnySExp { + encoding: LazyRawSExpKind::Binary_1_0(value), } } } diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs index 77297e54..99c28d2f 100644 --- a/src/lazy/binary/raw/reader.rs +++ b/src/lazy/binary/raw/reader.rs @@ -188,7 +188,7 @@ mod tests { let lazy_list = reader.next()?.expect_value()?.read()?.expect_list()?; // Exercise the `Debug` impl println!("Lazy Raw Sequence: {:?}", lazy_list); - let mut list_values = lazy_list.iter(); + let mut list_values = lazy_list.sequence.iter(); assert_eq!(list_values.next().expect("first")?.ion_type(), IonType::Int); assert_eq!( list_values.next().expect("second")?.ion_type(), diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs index be0649e5..7bb86d45 100644 --- a/src/lazy/binary/raw/sequence.rs +++ b/src/lazy/binary/raw/sequence.rs @@ -9,47 +9,87 @@ use crate::{IonResult, IonType}; use std::fmt; use std::fmt::{Debug, Formatter}; -#[derive(Clone)] -pub struct LazyRawBinarySequence<'data> { - pub(crate) value: LazyRawBinaryValue<'data>, +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinaryList<'data> { + pub(crate) sequence: LazyRawBinarySequence<'data>, } -impl<'data> LazyRawBinarySequence<'data> { - pub fn ion_type(&self) -> IonType { - self.value.ion_type() +#[derive(Debug, Copy, Clone)] +pub struct LazyRawBinarySExp<'data> { + pub(crate) sequence: LazyRawBinarySequence<'data>, +} + +impl<'data> LazyContainerPrivate<'data, BinaryEncoding> for LazyRawBinaryList<'data> { + fn from_value(value: LazyRawBinaryValue<'data>) -> Self { + LazyRawBinaryList { + sequence: LazyRawBinarySequence { value }, + } } +} - pub fn iter(&self) -> RawBinarySequenceIterator<'data> { - // Get as much of the sequence's body as is available in the input buffer. - // Reading a child value may fail as `Incomplete` - let buffer_slice = self.value.available_body(); - RawBinarySequenceIterator::new(buffer_slice) +impl<'data> LazyRawSequence<'data, BinaryEncoding> for LazyRawBinaryList<'data> { + type Iterator = RawBinarySequenceIterator<'data>; + + fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { + self.sequence.value.annotations() + } + + fn ion_type(&self) -> IonType { + IonType::List + } + + fn iter(&self) -> Self::Iterator { + self.sequence.iter() + } + + fn as_value(&self) -> LazyRawBinaryValue<'data> { + self.sequence.value } } -impl<'data> LazyContainerPrivate<'data, BinaryEncoding> for LazyRawBinarySequence<'data> { +impl<'data> LazyContainerPrivate<'data, BinaryEncoding> for LazyRawBinarySExp<'data> { fn from_value(value: LazyRawBinaryValue<'data>) -> Self { - LazyRawBinarySequence { value } + LazyRawBinarySExp { + sequence: LazyRawBinarySequence { value }, + } } } -impl<'data> LazyRawSequence<'data, BinaryEncoding> for LazyRawBinarySequence<'data> { +impl<'data> LazyRawSequence<'data, BinaryEncoding> for LazyRawBinarySExp<'data> { type Iterator = RawBinarySequenceIterator<'data>; fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> { - self.value.annotations() + self.sequence.value.annotations() } fn ion_type(&self) -> IonType { - self.value.ion_type() + IonType::SExp } fn iter(&self) -> Self::Iterator { - LazyRawBinarySequence::iter(self) + self.sequence.iter() } fn as_value(&self) -> LazyRawBinaryValue<'data> { - self.value + self.sequence.value + } +} + +#[derive(Copy, Clone)] +pub struct LazyRawBinarySequence<'data> { + pub(crate) value: LazyRawBinaryValue<'data>, +} + +impl<'data> LazyRawBinarySequence<'data> { + pub fn ion_type(&self) -> IonType { + self.value.ion_type() + } + + pub fn iter(&self) -> RawBinarySequenceIterator<'data> { + // Get as much of the sequence's body as is available in the input buffer. + // Reading a child value may fail as `Incomplete` + let buffer_slice = self.value.available_body(); + RawBinarySequenceIterator::new(buffer_slice) } } diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs index 1d8a7b49..231f9167 100644 --- a/src/lazy/binary/raw/value.rs +++ b/src/lazy/binary/raw/value.rs @@ -4,7 +4,9 @@ use crate::lazy::binary::encoded_value::EncodedValue; use crate::lazy::binary::immutable_buffer::ImmutableBuffer; use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct; -use crate::lazy::binary::raw::sequence::LazyRawBinarySequence; +use crate::lazy::binary::raw::sequence::{ + LazyRawBinaryList, LazyRawBinarySExp, LazyRawBinarySequence, +}; use crate::lazy::decoder::private::LazyRawValuePrivate; use crate::lazy::decoder::LazyRawValue; use crate::lazy::encoding::BinaryEncoding; @@ -422,7 +424,10 @@ impl<'data> LazyRawBinaryValue<'data> { input: self.input, }; let lazy_sequence = LazyRawBinarySequence { value: lazy_value }; - Ok(RawValueRef::SExp(lazy_sequence)) + let lazy_sexp = LazyRawBinarySExp { + sequence: lazy_sequence, + }; + Ok(RawValueRef::SExp(lazy_sexp)) } /// Helper method called by [`Self::read`]. Reads the current value as a list. @@ -433,7 +438,10 @@ impl<'data> LazyRawBinaryValue<'data> { input: self.input, }; let lazy_sequence = LazyRawBinarySequence { value: lazy_value }; - Ok(RawValueRef::List(lazy_sequence)) + let lazy_list = LazyRawBinaryList { + sequence: lazy_sequence, + }; + Ok(RawValueRef::List(lazy_list)) } /// Helper method called by [`Self::read`]. Reads the current value as a struct. diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs index 507f8f5a..da956a52 100644 --- a/src/lazy/decoder.rs +++ b/src/lazy/decoder.rs @@ -11,10 +11,13 @@ pub trait LazyDecoder<'data>: Sized + Debug + Clone { /// A lazy reader that yields [`Self::Value`]s representing the top level values in its input. type Reader: LazyRawReader<'data, Self>; /// A value (at any depth) in the input. This can be further inspected to access either its - /// scalar data or, if it is a container, to view it as [`Self::Sequence`] or [`Self::Struct`]. + /// scalar data or, if it is a container, to view it as [`Self::List`], [`Self::SExp`] or + /// [`Self::Struct`]. type Value: LazyRawValue<'data, Self>; - /// A list or expression whose child values may be accessed iteratively or by index. - type Sequence: LazyRawSequence<'data, Self>; + /// A list whose child values may be accessed iteratively. + type SExp: LazyRawSequence<'data, Self>; + /// An s-expression whose child values may be accessed iteratively. + type List: LazyRawSequence<'data, Self>; /// A struct whose fields may be accessed iteratively or by field name. type Struct: LazyRawStruct<'data, Self>; /// An iterator over the annotations on the input stream's values. diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs index ae556b74..9616a08d 100644 --- a/src/lazy/encoding.rs +++ b/src/lazy/encoding.rs @@ -1,12 +1,12 @@ use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator; use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct; use crate::lazy::binary::raw::reader::LazyRawBinaryReader; -use crate::lazy::binary::raw::sequence::LazyRawBinarySequence; +use crate::lazy::binary::raw::sequence::{LazyRawBinaryList, LazyRawBinarySExp}; use crate::lazy::binary::raw::value::LazyRawBinaryValue; use crate::lazy::decoder::LazyDecoder; use crate::lazy::text::raw::r#struct::LazyRawTextStruct; use crate::lazy::text::raw::reader::LazyRawTextReader; -use crate::lazy::text::raw::sequence::LazyRawTextSequence; +use crate::lazy::text::raw::sequence::{LazyRawTextList, LazyRawTextSExp}; use crate::lazy::text::value::{LazyRawTextValue, RawTextAnnotationsIterator}; // These types derive trait implementations in order to allow types that containing them @@ -23,7 +23,8 @@ pub struct TextEncoding; impl<'data> LazyDecoder<'data> for BinaryEncoding { type Reader = LazyRawBinaryReader<'data>; type Value = LazyRawBinaryValue<'data>; - type Sequence = LazyRawBinarySequence<'data>; + type SExp = LazyRawBinarySExp<'data>; + type List = LazyRawBinaryList<'data>; type Struct = LazyRawBinaryStruct<'data>; type AnnotationsIterator = RawBinaryAnnotationsIterator<'data>; } @@ -31,7 +32,8 @@ impl<'data> LazyDecoder<'data> for BinaryEncoding { impl<'data> LazyDecoder<'data> for TextEncoding { type Reader = LazyRawTextReader<'data>; type Value = LazyRawTextValue<'data>; - type Sequence = LazyRawTextSequence<'data>; + type SExp = LazyRawTextSExp<'data>; + type List = LazyRawTextList<'data>; type Struct = LazyRawTextStruct<'data>; type AnnotationsIterator = RawTextAnnotationsIterator<'data>; } diff --git a/src/lazy/raw_value_ref.rs b/src/lazy/raw_value_ref.rs index cd97beeb..57934e0e 100644 --- a/src/lazy/raw_value_ref.rs +++ b/src/lazy/raw_value_ref.rs @@ -20,8 +20,8 @@ pub enum RawValueRef<'data, D: LazyDecoder<'data>> { Symbol(RawSymbolTokenRef<'data>), Blob(&'data [u8]), Clob(&'data [u8]), - SExp(D::Sequence), - List(D::Sequence), + SExp(D::SExp), + List(D::List), Struct(D::Struct), } @@ -156,7 +156,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_list(self) -> IonResult { + pub fn expect_list(self) -> IonResult { if let RawValueRef::List(s) = self { Ok(s) } else { @@ -164,7 +164,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> { } } - pub fn expect_sexp(self) -> IonResult { + pub fn expect_sexp(self) -> IonResult { if let RawValueRef::SExp(s) = self { Ok(s) } else { diff --git a/src/lazy/sequence.rs b/src/lazy/sequence.rs index b3c830f8..8c97b9cc 100644 --- a/src/lazy/sequence.rs +++ b/src/lazy/sequence.rs @@ -6,8 +6,8 @@ use crate::{IonError, IonResult, IonType, SymbolTable}; use std::fmt; use std::fmt::{Debug, Formatter}; -/// A list or S-expression in a binary Ion stream whose header has been parsed but whose body -/// (i.e. its child values) have not. A `LazySequence` is immutable; its data can be read any +/// A list in a binary Ion stream whose header has been parsed but whose body +/// (i.e. its child values) have not. A `LazyList` is immutable; its data can be read any /// number of times. /// /// ``` @@ -44,26 +44,26 @@ use std::fmt::{Debug, Formatter}; ///# Ok(()) ///# } /// ``` -pub struct LazySequence<'top, 'data, D: LazyDecoder<'data>> { - pub(crate) raw_sequence: D::Sequence, +pub struct LazyList<'top, 'data, D: LazyDecoder<'data>> { + pub(crate) raw_list: D::List, pub(crate) symbol_table: &'top SymbolTable, } -pub type LazyBinarySequence<'top, 'data> = LazySequence<'top, 'data, BinaryEncoding>; +pub type LazyBinarySequence<'top, 'data> = LazyList<'top, 'data, BinaryEncoding>; -impl<'top, 'data, D: LazyDecoder<'data>> LazySequence<'top, 'data, D> { +impl<'top, 'data, D: LazyDecoder<'data>> LazyList<'top, 'data, D> { /// Returns the [`IonType`] of this sequence. /// /// This will always be either [`IonType::List`] or [`IonType::SExp`]. // TODO: We should have a `SequenceType` enum with only those options. pub fn ion_type(&self) -> IonType { - self.raw_sequence.ion_type() + self.raw_list.ion_type() } /// Returns an iterator over the values in this sequence. See: [`LazyValue`]. - pub fn iter(&self) -> SequenceIterator<'top, 'data, D> { - SequenceIterator { - raw_sequence_iter: self.raw_sequence.iter(), + pub fn iter(&self) -> ListIterator<'top, 'data, D> { + ListIterator { + raw_list_iter: self.raw_list.iter(), symbol_table: self.symbol_table, } } @@ -98,16 +98,16 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazySequence<'top, 'data, D> { /// ``` pub fn annotations(&self) -> AnnotationsIterator<'top, 'data, D> { AnnotationsIterator { - raw_annotations: self.raw_sequence.as_value().annotations(), + raw_annotations: self.raw_list.as_value().annotations(), symbol_table: self.symbol_table, } } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Sequence { +impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Sequence { type Error = IonError; - fn try_from(lazy_sequence: LazySequence<'top, 'data, D>) -> Result { + fn try_from(lazy_sequence: LazyList<'top, 'data, D>) -> Result { let sequence: Sequence = lazy_sequence .iter() .map(|v| Element::try_from(v?)) @@ -117,41 +117,36 @@ impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> f } } -impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { +impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { type Error = IonError; - fn try_from(lazy_sequence: LazySequence<'top, 'data, D>) -> Result { - let ion_type = lazy_sequence.ion_type(); - let annotations: Annotations = lazy_sequence.annotations().try_into()?; - let sequence: Sequence = lazy_sequence.try_into()?; - let value = match ion_type { - IonType::SExp => Value::SExp(sequence), - IonType::List => Value::List(sequence), - _ => unreachable!("no other IonTypes are sequences"), - }; + fn try_from(lazy_list: LazyList<'top, 'data, D>) -> Result { + let annotations: Annotations = lazy_list.annotations().try_into()?; + let sequence: Sequence = lazy_list.try_into()?; + let value = Value::List(sequence); Ok(value.with_annotations(annotations)) } } -impl<'a, 'top, 'data, D: LazyDecoder<'data>> IntoIterator for &'a LazySequence<'top, 'data, D> { +impl<'a, 'top, 'data, D: LazyDecoder<'data>> IntoIterator for &'a LazyList<'top, 'data, D> { type Item = IonResult>; - type IntoIter = SequenceIterator<'top, 'data, D>; + type IntoIter = ListIterator<'top, 'data, D>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -pub struct SequenceIterator<'top, 'data, D: LazyDecoder<'data>> { - raw_sequence_iter: >::Iterator, +pub struct ListIterator<'top, 'data, D: LazyDecoder<'data>> { + raw_list_iter: >::Iterator, symbol_table: &'top SymbolTable, } -impl<'top, 'data, D: LazyDecoder<'data>> Iterator for SequenceIterator<'top, 'data, D> { +impl<'top, 'data, D: LazyDecoder<'data>> Iterator for ListIterator<'top, 'data, D> { type Item = IonResult>; fn next(&mut self) -> Option { - let raw_value = match self.raw_sequence_iter.next() { + let raw_value = match self.raw_list_iter.next() { Some(Ok(raw_value)) => raw_value, Some(Err(e)) => return Some(Err(e)), None => return None, @@ -165,44 +160,152 @@ impl<'top, 'data, D: LazyDecoder<'data>> Iterator for SequenceIterator<'top, 'da } } -impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazySequence<'top, 'data, D> { +impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazyList<'top, 'data, D> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - match self.ion_type() { - IonType::SExp => { - write!(f, "(")?; - for value in self { - write!( - f, - "{:?} ", - value - .map_err(|_| fmt::Error)? - .read() - .map_err(|_| fmt::Error)? - )?; - } - write!(f, ")")?; - } - IonType::List => { - write!(f, "[")?; - for value in self { - write!( - f, - "{:?},", - value - .map_err(|_| fmt::Error)? - .read() - .map_err(|_| fmt::Error)? - )?; - } - write!(f, "]")?; - } - _ => unreachable!("LazySequence is only created for list and sexp"), + write!(f, "[")?; + for value in self { + write!( + f, + "{:?},", + value + .map_err(|_| fmt::Error)? + .read() + .map_err(|_| fmt::Error)? + )?; } + write!(f, "]")?; Ok(()) } } +// ===== SExps ===== + +pub struct LazySExp<'top, 'data, D: LazyDecoder<'data>> { + pub(crate) raw_sexp: D::SExp, + pub(crate) symbol_table: &'top SymbolTable, +} + +impl<'top, 'data, D: LazyDecoder<'data>> Debug for LazySExp<'top, 'data, D> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "(")?; + for value in self { + write!( + f, + "{:?} ", + value + .map_err(|_| fmt::Error)? + .read() + .map_err(|_| fmt::Error)? + )?; + } + write!(f, ")")?; + + Ok(()) + } +} + +impl<'top, 'data, D: LazyDecoder<'data>> LazySExp<'top, 'data, D> { + /// Returns an iterator over the values in this sequence. See: [`LazyValue`]. + pub fn iter(&self) -> SExpIterator<'top, 'data, D> { + SExpIterator { + raw_sexp_iter: self.raw_sexp.iter(), + symbol_table: self.symbol_table, + } + } + + /// Returns an iterator over the annotations on this value. If this value has no annotations, + /// the resulting iterator will be empty. + /// + /// ``` + ///# use ion_rs::IonResult; + ///# fn main() -> IonResult<()> { + /// + /// // Construct an Element and serialize it as binary Ion. + /// use ion_rs::{ion_sexp, Element, IntoAnnotatedElement}; + /// use ion_rs::lazy::reader::LazyBinaryReader; + /// + /// let element: Element = ion_sexp!(true false).with_annotations(["foo", "bar", "baz"]); + /// let binary_ion = element.to_binary()?; + /// + /// let mut lazy_reader = LazyBinaryReader::new(&binary_ion)?; + /// + /// // Get the first lazy value from the stream. + /// let lazy_sexp = lazy_reader.expect_next()?.read()?.expect_sexp()?; + /// + /// // Inspect its annotations. + /// let mut annotations = lazy_sexp.annotations(); + /// assert_eq!(annotations.next().unwrap()?, "foo"); + /// assert_eq!(annotations.next().unwrap()?, "bar"); + /// assert_eq!(annotations.next().unwrap()?, "baz"); + /// + ///# Ok(()) + ///# } + /// ``` + pub fn annotations(&self) -> AnnotationsIterator<'top, 'data, D> { + AnnotationsIterator { + raw_annotations: self.raw_sexp.as_value().annotations(), + symbol_table: self.symbol_table, + } + } +} + +impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Sequence { + type Error = IonError; + + fn try_from(lazy_sequence: LazySExp<'top, 'data, D>) -> Result { + let sequence: Sequence = lazy_sequence + .iter() + .map(|v| Element::try_from(v?)) + .collect::>>()? + .into(); + Ok(sequence) + } +} + +impl<'top, 'data, D: LazyDecoder<'data>> TryFrom> for Element { + type Error = IonError; + + fn try_from(lazy_sequence: LazySExp<'top, 'data, D>) -> Result { + let annotations: Annotations = lazy_sequence.annotations().try_into()?; + let sequence: Sequence = lazy_sequence.try_into()?; + let value = Value::SExp(sequence); + Ok(value.with_annotations(annotations)) + } +} + +impl<'a, 'top, 'data, D: LazyDecoder<'data>> IntoIterator for &'a LazySExp<'top, 'data, D> { + type Item = IonResult>; + type IntoIter = SExpIterator<'top, 'data, D>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +pub struct SExpIterator<'top, 'data, D: LazyDecoder<'data>> { + raw_sexp_iter: >::Iterator, + symbol_table: &'top SymbolTable, +} + +impl<'top, 'data, D: LazyDecoder<'data>> Iterator for SExpIterator<'top, 'data, D> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + let raw_value = match self.raw_sexp_iter.next() { + Some(Ok(raw_value)) => raw_value, + Some(Err(e)) => return Some(Err(e)), + None => return None, + }; + + let lazy_value = LazyValue { + raw_value, + symbol_table: self.symbol_table, + }; + Some(Ok(lazy_value)) + } +} + #[cfg(test)] mod tests { use crate::element::Element; diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs index 635d6001..ae9e12d8 100644 --- a/src/lazy/text/buffer.rs +++ b/src/lazy/text/buffer.rs @@ -23,7 +23,7 @@ use crate::lazy::text::matched::{ use crate::lazy::text::parse_result::{InvalidInputError, IonParseError}; use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult}; use crate::lazy::text::raw::r#struct::{LazyRawTextField, RawTextStructIterator}; -use crate::lazy::text::raw::sequence::RawTextSequenceIterator; +use crate::lazy::text::raw::sequence::{RawTextListIterator, RawTextSExpIterator}; use crate::lazy::text::value::LazyRawTextValue; use crate::result::DecodingError; use crate::{IonError, IonResult, IonType, TimestampPrecision}; @@ -265,6 +265,32 @@ impl<'data> TextBufferView<'data> { )(self) } + /// Matches an optional annotations sequence and a value, including operators. + pub fn match_sexp_value(self) -> IonParseResult<'data, Option>> { + whitespace_and_then(alt(( + value(None, tag(")")), + pair( + opt(Self::match_annotations), + // We need the s-expression parser to recognize the input `--3` as the operator `--` and the + // int `3` while recognizing the input `-3` as the int `-3`. If `match_operator` runs before + // `match_value`, it will consume the sign (`-`) of negative number values, treating + // `-3` as an operator (`-`) and an int (`3`). Thus, we run `match_value` first. + alt((Self::match_value, Self::match_operator)), + ) + .map(|(maybe_annotations, mut value)| { + if let Some(annotations) = maybe_annotations { + value.encoded_value = value + .encoded_value + .with_annotations_sequence(annotations.offset(), annotations.len()); + // Rewind the value's input to include the annotations sequence. + value.input = self.slice_to_end(annotations.offset() - self.offset()); + } + Some(value) + }), + ))) + .parse(self) + } + /// Matches a single value in a list OR the end of the list, allowing for leading whitespace /// and comments in either case. /// @@ -337,6 +363,25 @@ impl<'data> TextBufferView<'data> { )(self) } + /// Matches an optional annotation sequence and a trailing value. + pub fn match_annotated_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> { + pair( + opt(Self::match_annotations), + whitespace_and_then(Self::match_value), + ) + .map(|(maybe_annotations, mut value)| { + if let Some(annotations) = maybe_annotations { + value.encoded_value = value + .encoded_value + .with_annotations_sequence(annotations.offset(), annotations.len()); + // Rewind the value's input to include the annotations sequence. + value.input = self.slice_to_end(annotations.offset() - self.offset()); + } + value + }) + .parse(self) + } + /// Matches a struct field name. That is: /// * A quoted symbol /// * An identifier @@ -387,25 +432,6 @@ impl<'data> TextBufferView<'data> { .map(|(remaining, value)| (remaining, RawStreamItem::Value(value))) } - /// Matches an optional annotation sequence and a trailing value. - pub fn match_annotated_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> { - pair( - opt(Self::match_annotations), - whitespace_and_then(Self::match_value), - ) - .map(|(maybe_annotations, mut value)| { - if let Some(annotations) = maybe_annotations { - value.encoded_value = value - .encoded_value - .with_annotations_sequence(annotations.offset(), annotations.len()); - // Rewind the value's input to include the annotations sequence. - value.input = self.slice_to_end(annotations.offset() - self.offset()); - } - value - }) - .parse(self) - } - /// Matches a single scalar value or the beginning of a container. pub fn match_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> { alt(( @@ -467,6 +493,12 @@ impl<'data> TextBufferView<'data> { EncodedTextValue::new(MatchedValue::List, matched_list.offset(), length) }, ), + map( + match_and_length(Self::match_sexp), + |(matched_list, length)| { + EncodedTextValue::new(MatchedValue::SExp, matched_list.offset(), length) + }, + ), map( match_and_length(Self::match_struct), |(matched_struct, length)| { @@ -493,7 +525,7 @@ impl<'data> TextBufferView<'data> { } // Scan ahead to find the end of this list. let list_body = self.slice_to_end(1); - let sequence_iter = RawTextSequenceIterator::new(b']', list_body); + let sequence_iter = RawTextListIterator::new(list_body); let span = match sequence_iter.find_span() { Ok(span) => span, // If the complete container isn't available, return an incomplete. @@ -516,6 +548,38 @@ impl<'data> TextBufferView<'data> { Ok((remaining, matched)) } + /// Matches an s-expression (sexp). + /// + /// If the input does not contain the entire s-expression, returns `IonError::Incomplete(_)`. + pub fn match_sexp(self) -> IonMatchResult<'data> { + if self.bytes().first() != Some(&b'(') { + let error = InvalidInputError::new(self); + return Err(nom::Err::Error(IonParseError::Invalid(error))); + } + // Scan ahead to find the end of this sexp + let sexp_body = self.slice_to_end(1); + let sexp_iter = RawTextSExpIterator::new(sexp_body); + let span = match sexp_iter.find_span() { + Ok(span) => span, + // If the complete container isn't available, return an incomplete. + Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)), + // If invalid syntax was encountered, return a failure to prevent nom from trying + // other parser kinds. + Err(e) => { + return { + let error = InvalidInputError::new(self) + .with_label("matching a sexp") + .with_description(format!("{}", e)); + Err(nom::Err::Failure(IonParseError::Invalid(error))) + } + } + }; + // For the matched span, we use `self` again to include the opening `(` + let matched = self.slice(0, span.len()); + let remaining = self.slice_to_end(span.len()); + Ok((remaining, matched)) + } + /// Matches a struct. /// /// If the input does not contain the entire struct, returns `IonError::Incomplete(_)`. @@ -857,9 +921,22 @@ impl<'data> TextBufferView<'data> { fail(self) } + /// Matches an operator symbol, which can only legally appear within an s-expression + fn match_operator(self) -> IonParseResult<'data, LazyRawTextValue<'data>> { + match_and_length(is_a("!#%&*+-./;<=>?@^`|~")) + .map(|(text, length): (TextBufferView, usize)| LazyRawTextValue { + input: self, + encoded_value: EncodedTextValue::new( + MatchedValue::Symbol(MatchedSymbol::Operator), + text.offset(), + length, + ), + }) + .parse(self) + } + /// Matches a symbol ID (`$28`), an identifier (`foo`), or a quoted symbol (`'foo'`). fn match_symbol(self) -> IonParseResult<'data, MatchedSymbol> { - // TODO: operators alt(( Self::match_symbol_id, Self::match_identifier, @@ -1798,4 +1875,34 @@ mod tests { mismatch_annotated_value(input); } } + + #[test] + fn test_match_sexp() { + fn match_sexp(input: &str) { + MatchTest::new(input).expect_match(match_length(TextBufferView::match_sexp)); + } + fn mismatch_sexp(input: &str) { + MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_sexp)); + } + let good_inputs = &[ + "()", + "(1)", + "(1 2)", + "(a)", + "(a b)", + "(a++)", + "(++a)", + "(())", + "((()))", + "(1 (2 (3 4) 5) 6)", + ]; + for input in good_inputs { + match_sexp(input); + } + + let bad_inputs = &["foo", "1", "(", "(1 2 (3 4 5)"]; + for input in bad_inputs { + mismatch_sexp(input); + } + } } diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs index 78d7102d..208c9f3b 100644 --- a/src/lazy/text/encoded_value.rs +++ b/src/lazy/text/encoded_value.rs @@ -130,6 +130,7 @@ impl EncodedTextValue { MatchedValue::String(_) => IonType::String, MatchedValue::Symbol(_) => IonType::Symbol, MatchedValue::List => IonType::List, + MatchedValue::SExp => IonType::SExp, MatchedValue::Struct => IonType::Struct, } } diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index 49b575ba..c3bd5399 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -50,6 +50,7 @@ pub(crate) enum MatchedValue { String(MatchedString), Symbol(MatchedSymbol), List, + SExp, Struct, // TODO: ...the other types } @@ -383,7 +384,8 @@ pub(crate) enum MatchedSymbol { QuotedWithoutEscapes, /// The symbol is delimited by single quotes and has at least one escape sequence. QuotedWithEscapes, - // TODO: Operators in S-Expressions + /// An operator within an S-expression + Operator, } impl MatchedSymbol { @@ -391,11 +393,12 @@ impl MatchedSymbol { &self, matched_input: TextBufferView<'data>, ) -> IonResult> { + use MatchedSymbol::*; match self { - MatchedSymbol::SymbolId => self.read_symbol_id(matched_input), - MatchedSymbol::Identifier => self.read_identifier(matched_input), - MatchedSymbol::QuotedWithEscapes => self.read_quoted_with_escapes(matched_input), - MatchedSymbol::QuotedWithoutEscapes => self.read_quoted_without_escapes(matched_input), + SymbolId => self.read_symbol_id(matched_input), + Identifier | Operator => self.read_unquoted(matched_input), + QuotedWithEscapes => self.read_quoted_with_escapes(matched_input), + QuotedWithoutEscapes => self.read_quoted_without_escapes(matched_input), } } @@ -427,7 +430,9 @@ impl MatchedSymbol { Ok(RawSymbolTokenRef::Text(text.into())) } - pub(crate) fn read_identifier<'data>( + /// Reads a symbol with no surrounding quotes (and therefore no escapes). + /// This is used for both identifiers and (within s-expressions) operators. + pub(crate) fn read_unquoted<'data>( &self, matched_input: TextBufferView<'data>, ) -> IonResult> { diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs index 1a409429..db7687a4 100644 --- a/src/lazy/text/raw/reader.rs +++ b/src/lazy/text/raw/reader.rs @@ -167,6 +167,12 @@ mod tests { 3 ] + ( + foo++ + 2 + 3 + ) + { // Identifier foo: 100, @@ -323,6 +329,26 @@ mod tests { } assert_eq!(sum, 6); + // (foo++ 1 2) + let sexp = reader.next()?.expect_value()?.read()?.expect_sexp()?; + let mut sexp_elements = sexp.iter(); + assert_eq!( + sexp_elements.next().unwrap()?.read()?, + RawValueRef::Symbol("foo".into()) + ); + assert_eq!( + sexp_elements.next().unwrap()?.read()?, + RawValueRef::Symbol("++".into()) + ); + assert_eq!( + sexp_elements.next().unwrap()?.read()?, + RawValueRef::Int(2.into()) + ); + assert_eq!( + sexp_elements.next().unwrap()?.read()?, + RawValueRef::Int(3.into()) + ); + // {foo: 100, bar: 200, baz: 300} let item = reader.next()?; let value = item.expect_value()?.read()?; diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs index c581f29e..aee5d58e 100644 --- a/src/lazy/text/raw/sequence.rs +++ b/src/lazy/text/raw/sequence.rs @@ -5,41 +5,43 @@ use std::ops::Range; use nom::character::streaming::satisfy; use crate::lazy::decoder::private::LazyContainerPrivate; -use crate::lazy::decoder::{LazyDecoder, LazyRawSequence, LazyRawValue}; +use crate::lazy::decoder::{LazyRawSequence, LazyRawValue}; use crate::lazy::encoding::TextEncoding; use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::parse_result::AddContext; use crate::lazy::text::parse_result::ToIteratorOutput; -use crate::lazy::text::value::LazyRawTextValue; +use crate::lazy::text::value::{LazyRawTextValue, RawTextAnnotationsIterator}; use crate::{IonResult, IonType}; +// ===== Lists ===== + #[derive(Copy, Clone)] -pub struct LazyRawTextSequence<'data> { +pub struct LazyRawTextList<'data> { pub(crate) value: LazyRawTextValue<'data>, } -impl<'data> LazyRawTextSequence<'data> { +impl<'data> LazyRawTextList<'data> { pub fn ion_type(&self) -> IonType { self.value.ion_type() } - pub fn iter(&self) -> RawTextSequenceIterator<'data> { + pub fn iter(&self) -> RawTextListIterator<'data> { // Make an iterator over the input bytes that follow the initial `[` - RawTextSequenceIterator::new(b']', self.value.input.slice_to_end(1)) + RawTextListIterator::new(self.value.input.slice_to_end(1)) } } -impl<'data> LazyContainerPrivate<'data, TextEncoding> for LazyRawTextSequence<'data> { +impl<'data> LazyContainerPrivate<'data, TextEncoding> for LazyRawTextList<'data> { fn from_value(value: LazyRawTextValue<'data>) -> Self { - LazyRawTextSequence { value } + LazyRawTextList { value } } } -impl<'data> LazyRawSequence<'data, TextEncoding> for LazyRawTextSequence<'data> { - type Iterator = RawTextSequenceIterator<'data>; +impl<'data> LazyRawSequence<'data, TextEncoding> for LazyRawTextList<'data> { + type Iterator = RawTextListIterator<'data>; - fn annotations(&self) -> >::AnnotationsIterator { - todo!("lazy sequence annotations") + fn annotations(&self) -> RawTextAnnotationsIterator<'data> { + self.value.annotations() } fn ion_type(&self) -> IonType { @@ -47,7 +49,7 @@ impl<'data> LazyRawSequence<'data, TextEncoding> for LazyRawTextSequence<'data> } fn iter(&self) -> Self::Iterator { - LazyRawTextSequence::iter(self) + LazyRawTextList::iter(self) } fn as_value(&self) -> LazyRawTextValue<'data> { @@ -55,75 +57,51 @@ impl<'data> LazyRawSequence<'data, TextEncoding> for LazyRawTextSequence<'data> } } -impl<'a, 'data> IntoIterator for &'a LazyRawTextSequence<'data> { +impl<'a, 'data> IntoIterator for &'a LazyRawTextList<'data> { type Item = IonResult>; - type IntoIter = RawTextSequenceIterator<'data>; + type IntoIter = RawTextListIterator<'data>; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl<'a> Debug for LazyRawTextSequence<'a> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self.value.encoded_value.ion_type() { - IonType::SExp => { - write!(f, "(")?; - for value in self { - write!( - f, - "{:?} ", - value - .map_err(|_| fmt::Error)? - .read() - .map_err(|_| fmt::Error)? - )?; - } - write!(f, ")").unwrap(); - } - IonType::List => { - write!(f, "[")?; - for value in self { - write!( - f, - "{:?},", - value - .map_err(|_| fmt::Error)? - .read() - .map_err(|_| fmt::Error)? - )?; - } - write!(f, "]").unwrap(); - } - _ => unreachable!("LazyRawSequence is only created for list and sexp"), +impl<'a> Debug for LazyRawTextList<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + for value in self { + write!( + f, + "{:?}, ", + value + .map_err(|_| fmt::Error)? + .read() + .map_err(|_| fmt::Error)? + )?; } + write!(f, "]").unwrap(); Ok(()) } } #[derive(Copy, Clone, Debug)] -pub struct RawTextSequenceIterator<'data> { - end_delimiter: u8, +pub struct RawTextListIterator<'data> { input: TextBufferView<'data>, // If this iterator has returned an error, it should return `None` forever afterwards has_returned_error: bool, } -impl<'data> RawTextSequenceIterator<'data> { - pub(crate) fn new( - end_delimiter: u8, - input: TextBufferView<'data>, - ) -> RawTextSequenceIterator<'data> { - RawTextSequenceIterator { - end_delimiter, +impl<'data> RawTextListIterator<'data> { + pub(crate) fn new(input: TextBufferView<'data>) -> RawTextListIterator<'data> { + RawTextListIterator { input, has_returned_error: false, } } } -impl<'data> RawTextSequenceIterator<'data> { +impl<'data> RawTextListIterator<'data> { pub(crate) fn find_span(&self) -> IonResult> { // The input has already skipped past the opening delimiter. let start = self.input.offset() - 1; @@ -138,18 +116,15 @@ impl<'data> RawTextSequenceIterator<'data> { }; let (input_after_ws, _ws) = input_after_last .match_optional_comments_and_whitespace() - .with_context("seeking the end of a sequence", input_after_last)?; - let (input_after_end, _end_delimiter) = - satisfy(|c| c == self.end_delimiter as char)(input_after_ws).with_context( - "seeking the closing delimiter of a sequence", - input_after_ws, - )?; + .with_context("seeking the end of a list", input_after_last)?; + let (input_after_end, _end_delimiter) = satisfy(|c| c == ']')(input_after_ws) + .with_context("seeking the closing delimiter of a list", input_after_ws)?; let end = input_after_end.offset(); Ok(start..end) } } -impl<'data> Iterator for RawTextSequenceIterator<'data> { +impl<'data> Iterator for RawTextListIterator<'data> { type Item = IonResult>; fn next(&mut self) -> Option { @@ -171,6 +146,139 @@ impl<'data> Iterator for RawTextSequenceIterator<'data> { } } +// ===== S-Expressions ===== + +#[derive(Copy, Clone)] +pub struct LazyRawTextSExp<'data> { + pub(crate) value: LazyRawTextValue<'data>, +} + +impl<'data> LazyRawTextSExp<'data> { + pub fn ion_type(&self) -> IonType { + self.value.ion_type() + } + + pub fn iter(&self) -> RawTextSExpIterator<'data> { + // Make an iterator over the input bytes that follow the initial `(` + RawTextSExpIterator::new(self.value.input.slice_to_end(1)) + } +} + +#[derive(Copy, Clone, Debug)] +pub struct RawTextSExpIterator<'data> { + input: TextBufferView<'data>, + // If this iterator has returned an error, it should return `None` forever afterwards + has_returned_error: bool, +} + +impl<'data> RawTextSExpIterator<'data> { + pub(crate) fn new(input: TextBufferView<'data>) -> RawTextSExpIterator<'data> { + RawTextSExpIterator { + input, + has_returned_error: false, + } + } +} + +impl<'data> RawTextSExpIterator<'data> { + pub(crate) fn find_span(&self) -> IonResult> { + // The input has already skipped past the opening delimiter. + let start = self.input.offset() - 1; + // We need to find the input slice containing the closing delimiter. It's either... + let input_after_last = if let Some(value_result) = self.last() { + let value = value_result?; + // ...the input slice that follows the last sequence value... + value.input.slice_to_end(value.encoded_value.total_length()) + } else { + // ...or there aren't values, so it's just the input after the opening delimiter. + self.input + }; + let (input_after_ws, _ws) = input_after_last + .match_optional_comments_and_whitespace() + .with_context("seeking the end of a list", input_after_last)?; + let (input_after_end, _end_delimiter) = satisfy(|c| c == ')')(input_after_ws) + .with_context("seeking the closing delimiter of a list", input_after_ws)?; + let end = input_after_end.offset(); + Ok(start..end) + } +} + +impl<'data> Iterator for RawTextSExpIterator<'data> { + type Item = IonResult>; + + fn next(&mut self) -> Option { + if self.has_returned_error { + return None; + } + match self.input.match_sexp_value() { + Ok((remaining, Some(value))) => { + self.input = remaining; + Some(Ok(value)) + } + Ok((_remaining, None)) => None, + Err(e) => { + self.has_returned_error = true; + e.with_context("reading the next list value", self.input) + .transpose() + } + } + } +} + +impl<'data> LazyContainerPrivate<'data, TextEncoding> for LazyRawTextSExp<'data> { + fn from_value(value: LazyRawTextValue<'data>) -> Self { + LazyRawTextSExp { value } + } +} + +impl<'data> LazyRawSequence<'data, TextEncoding> for LazyRawTextSExp<'data> { + type Iterator = RawTextSExpIterator<'data>; + + fn annotations(&self) -> RawTextAnnotationsIterator<'data> { + self.value.annotations() + } + + fn ion_type(&self) -> IonType { + self.value.ion_type() + } + + fn iter(&self) -> Self::Iterator { + LazyRawTextSExp::iter(self) + } + + fn as_value(&self) -> LazyRawTextValue<'data> { + self.value + } +} + +impl<'a, 'data> IntoIterator for &'a LazyRawTextSExp<'data> { + type Item = IonResult>; + type IntoIter = RawTextSExpIterator<'data>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> Debug for LazyRawTextSExp<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "(")?; + for value in self { + write!( + f, + "{:?} ", + value + .map_err(|_| fmt::Error)? + .read() + .map_err(|_| fmt::Error)? + )?; + } + write!(f, ")").unwrap(); + + Ok(()) + } +} + #[cfg(test)] mod tests { use std::ops::Range; diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs index 1171c10a..830ddd58 100644 --- a/src/lazy/text/value.rs +++ b/src/lazy/text/value.rs @@ -9,7 +9,7 @@ use crate::lazy::text::buffer::TextBufferView; use crate::lazy::text::encoded_value::EncodedTextValue; use crate::lazy::text::matched::MatchedValue; use crate::lazy::text::raw::r#struct::LazyRawTextStruct; -use crate::lazy::text::raw::sequence::LazyRawTextSequence; +use crate::lazy::text::raw::sequence::{LazyRawTextList, LazyRawTextSExp}; use crate::{IonResult, IonType, RawSymbolTokenRef}; /// A value that has been identified in the text input stream but whose data has not yet been read. @@ -69,8 +69,12 @@ impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> { MatchedValue::String(s) => RawValueRef::String(s.read(matched_input)?), MatchedValue::Symbol(s) => RawValueRef::Symbol(s.read(matched_input)?), MatchedValue::List => { - let lazy_sequence = LazyRawTextSequence { value: *self }; - RawValueRef::List(lazy_sequence) + let lazy_list = LazyRawTextList { value: *self }; + RawValueRef::List(lazy_list) + } + MatchedValue::SExp => { + let lazy_sexp = LazyRawTextSExp { value: *self }; + RawValueRef::SExp(lazy_sexp) } MatchedValue::Struct => { let lazy_struct = LazyRawTextStruct { value: *self }; diff --git a/src/lazy/value.rs b/src/lazy/value.rs index f25caf7e..26ed7ba5 100644 --- a/src/lazy/value.rs +++ b/src/lazy/value.rs @@ -1,7 +1,7 @@ use crate::lazy::decoder::{LazyDecoder, LazyRawValue}; use crate::lazy::encoding::BinaryEncoding; use crate::lazy::r#struct::LazyStruct; -use crate::lazy::sequence::LazySequence; +use crate::lazy::sequence::{LazyList, LazySExp}; use crate::lazy::value_ref::ValueRef; use crate::result::IonFailure; use crate::symbol_ref::AsSymbolRef; @@ -193,15 +193,15 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyValue<'top, 'data, D> { Blob(b) => ValueRef::Blob(b), Clob(c) => ValueRef::Clob(c), SExp(s) => { - let lazy_sequence = LazySequence { - raw_sequence: s, + let lazy_sexp = LazySExp { + raw_sexp: s, symbol_table: self.symbol_table, }; - ValueRef::SExp(lazy_sequence) + ValueRef::SExp(lazy_sexp) } List(l) => { - let lazy_sequence = LazySequence { - raw_sequence: l, + let lazy_sequence = LazyList { + raw_list: l, symbol_table: self.symbol_table, }; ValueRef::List(lazy_sequence) diff --git a/src/lazy/value_ref.rs b/src/lazy/value_ref.rs index 0b4fb739..58f5cc50 100644 --- a/src/lazy/value_ref.rs +++ b/src/lazy/value_ref.rs @@ -1,7 +1,7 @@ use crate::element::Value; use crate::lazy::decoder::LazyDecoder; use crate::lazy::r#struct::LazyStruct; -use crate::lazy::sequence::LazySequence; +use crate::lazy::sequence::{LazyList, LazySExp}; use crate::lazy::str_ref::StrRef; use crate::result::IonFailure; use crate::{Decimal, Int, IonError, IonResult, IonType, SymbolRef, Timestamp}; @@ -9,7 +9,7 @@ use std::fmt::{Debug, Formatter}; /// A [ValueRef] represents a value that has been read from the input stream. Scalar variants contain /// their associated data, while container variants contain a handle to traverse the container. (See -/// [LazySequence] and [LazyStruct].) +/// [LazyList] and [LazyStruct].) /// /// Unlike a [Value], a `ValueRef` avoids heap allocation whenever possible, choosing to point instead /// to existing resources. Numeric values and timestamps are stored within the `ValueRef` itself. @@ -25,8 +25,8 @@ pub enum ValueRef<'top, 'data, D: LazyDecoder<'data>> { Symbol(SymbolRef<'top>), Blob(&'data [u8]), Clob(&'data [u8]), - SExp(LazySequence<'top, 'data, D>), - List(LazySequence<'top, 'data, D>), + SExp(LazySExp<'top, 'data, D>), + List(LazyList<'top, 'data, D>), Struct(LazyStruct<'top, 'data, D>), } @@ -185,7 +185,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> { } } - pub fn expect_list(self) -> IonResult> { + pub fn expect_list(self) -> IonResult> { if let ValueRef::List(s) = self { Ok(s) } else { @@ -193,7 +193,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> { } } - pub fn expect_sexp(self) -> IonResult> { + pub fn expect_sexp(self) -> IonResult> { if let ValueRef::SExp(s) = self { Ok(s) } else { diff --git a/src/raw_symbol_token_ref.rs b/src/raw_symbol_token_ref.rs index dddedc7c..961bab97 100644 --- a/src/raw_symbol_token_ref.rs +++ b/src/raw_symbol_token_ref.rs @@ -64,3 +64,9 @@ impl AsRawSymbolTokenRef for RawSymbolToken { } } } + +impl<'a> From<&'a str> for RawSymbolTokenRef<'a> { + fn from(value: &'a str) -> Self { + RawSymbolTokenRef::Text(Cow::Borrowed(value)) + } +}