Skip to content

Commit

Permalink
Adds LazyRawTextReader support for structs (#619)
Browse files Browse the repository at this point in the history
  • Loading branch information
zslayton committed Aug 23, 2023
1 parent cb1042a commit 998a5bf
Show file tree
Hide file tree
Showing 11 changed files with 676 additions and 137 deletions.
8 changes: 8 additions & 0 deletions src/lazy/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::lazy::raw_stream_item::RawStreamItem;
use crate::lazy::raw_value_ref::RawValueRef;
use crate::result::IonFailure;
use crate::{IonResult, IonType, RawSymbolTokenRef};
use std::fmt::Debug;

Expand Down Expand Up @@ -86,6 +87,13 @@ pub trait LazyRawStruct<'data, D: LazyDecoder<'data>>:
fn annotations(&self) -> D::AnnotationsIterator;
fn find(&self, name: &str) -> IonResult<Option<D::Value>>;
fn get(&self, name: &str) -> IonResult<Option<RawValueRef<'data, D>>>;
fn get_expected(&self, name: &str) -> IonResult<RawValueRef<'data, D>> {
if let Some(value) = self.get(name)? {
Ok(value)
} else {
IonResult::decoding_error(format!("did not find expected struct field '{}'", name))
}
}
fn iter(&self) -> Self::Iterator;
}

Expand Down
59 changes: 5 additions & 54 deletions src/lazy/encoding.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use std::marker::PhantomData;

use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator;
use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct;
use crate::lazy::binary::raw::reader::LazyRawBinaryReader;
use crate::lazy::binary::raw::sequence::LazyRawBinarySequence;
use crate::lazy::binary::raw::value::LazyRawBinaryValue;
use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate};
use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawStruct};
use crate::lazy::raw_value_ref::RawValueRef;
use crate::lazy::decoder::LazyDecoder;
use crate::lazy::text::raw::r#struct::LazyRawTextStruct;
use crate::lazy::text::raw::reader::LazyRawTextReader;
use crate::lazy::text::raw::sequence::LazyRawTextSequence;
use crate::lazy::text::value::LazyRawTextValue;
use crate::{IonResult, RawSymbolTokenRef};
use std::marker::PhantomData;

// These types derive trait implementations in order to allow types that containing them
// to also derive trait implementations.
Expand All @@ -35,55 +35,6 @@ impl<'data> LazyDecoder<'data> for BinaryEncoding {
// The types below will need to be properly defined in order for the lazy text reader to be complete.
// The exist to satisfy various trait definitions.

#[derive(Debug, Clone)]
pub struct ToDoTextStruct;

#[derive(Debug, Clone)]
pub struct ToDoTextField;

impl<'data> LazyRawFieldPrivate<'data, TextEncoding> for ToDoTextField {
fn into_value(self) -> LazyRawTextValue<'data> {
todo!()
}
}

impl<'data> LazyRawField<'data, TextEncoding> for ToDoTextField {
fn name(&self) -> RawSymbolTokenRef<'data> {
todo!()
}

fn value(&self) -> &LazyRawTextValue<'data> {
todo!()
}
}

impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextStruct {
fn from_value(_value: <TextEncoding as LazyDecoder>::Value) -> Self {
todo!()
}
}

impl<'data> LazyRawStruct<'data, TextEncoding> for ToDoTextStruct {
type Field = ToDoTextField;
type Iterator = Box<dyn Iterator<Item = IonResult<ToDoTextField>>>;

fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> {
todo!()
}

fn find(&self, _name: &str) -> IonResult<Option<LazyRawTextValue<'data>>> {
todo!()
}

fn get(&self, _name: &str) -> IonResult<Option<RawValueRef<'data, TextEncoding>>> {
todo!()
}

fn iter(&self) -> Self::Iterator {
todo!()
}
}

#[derive(Debug, Clone)]
pub struct ToDoTextAnnotationsIterator<'data> {
spooky: &'data PhantomData<()>,
Expand All @@ -101,6 +52,6 @@ impl<'data> LazyDecoder<'data> for TextEncoding {
type Reader = LazyRawTextReader<'data>;
type Value = LazyRawTextValue<'data>;
type Sequence = LazyRawTextSequence<'data>;
type Struct = ToDoTextStruct;
type Struct = LazyRawTextStruct<'data>;
type AnnotationsIterator = ToDoTextAnnotationsIterator<'data>;
}
2 changes: 1 addition & 1 deletion src/lazy/raw_value_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> {
if let RawValueRef::Struct(s) = self {
Ok(s)
} else {
IonResult::decoding_error("expected a struct")
IonResult::decoding_error(format!("expected a struct, found: {:?}", self))
}
}
}
Expand Down
206 changes: 199 additions & 7 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::fmt::{Debug, Formatter};
use std::iter::{Copied, Enumerate};
use std::ops::{RangeFrom, RangeTo};
use std::ops::{Range, RangeFrom, RangeTo};
use std::slice::Iter;

use nom::branch::alt;
Expand All @@ -16,10 +16,12 @@ use crate::lazy::encoding::TextEncoding;
use crate::lazy::raw_stream_item::RawStreamItem;
use crate::lazy::text::encoded_value::EncodedTextValue;
use crate::lazy::text::matched::{
MatchedFloat, MatchedInt, MatchedShortString, MatchedString, MatchedSymbol, MatchedValue,
MatchedFloat, MatchedInt, MatchedString, MatchedSymbol, MatchedValue,
};
use crate::lazy::text::parse_result::{InvalidInputError, IonParseError};
use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
use crate::lazy::text::raw::r#struct::{LazyRawTextField, RawTextStructIterator};
use crate::lazy::text::raw::sequence::RawTextSequenceIterator;
use crate::lazy::text::value::LazyRawTextValue;
use crate::result::DecodingError;
use crate::{IonError, IonResult, IonType};
Expand Down Expand Up @@ -246,6 +248,78 @@ impl<'data> TextBufferView<'data> {
)(self)
}

/// Matches a struct field name/value pair.
///
/// If a pair is found, returns `Some(field)` and consumes the following comma if present.
/// If no pair is found (that is: the end of the struct is next), returns `None`.
pub fn match_struct_field(self) -> IonParseResult<'data, Option<LazyRawTextField<'data>>> {
// A struct field can have leading whitespace, but we want the buffer slice that we match
// to begin with the field name. Here we skip any whitespace so we have another named
// slice (`input_including_field_name`) with that property.
let (input_including_field_name, _ws) = self.match_optional_comments_and_whitespace()?;
alt((
// If the next thing in the input is a `}`, return `None`.
value(None, Self::match_struct_end),
// Otherwise, match a name/value pair and turn it into a `LazyRawTextField`.
Self::match_struct_field_name_and_value.map(
move |((name_syntax, name_span), mut value)| {
// Add the field name offsets to the `EncodedTextValue`
value.encoded_value = value.encoded_value.with_field_name(
name_syntax,
name_span.start,
name_span.len(),
);
// Replace the value's buffer slice (which starts with the value itself) with the
// buffer slice we created that begins with the field name.
value.input = input_including_field_name;
Some(LazyRawTextField { value })
},
),
))(input_including_field_name)
}

/// Matches any amount of whitespace followed by a closing `}`.
fn match_struct_end(self) -> IonMatchResult<'data> {
whitespace_and_then(peek(tag("}"))).parse(self)
}

/// Matches a field name/value pair. Returns the syntax used for the field name, the range of
/// input bytes where the field name is found, and the value.
pub fn match_struct_field_name_and_value(
self,
) -> IonParseResult<'data, ((MatchedSymbol, Range<usize>), LazyRawTextValue<'data>)> {
terminated(
separated_pair(
whitespace_and_then(match_and_span(Self::match_struct_field_name)),
whitespace_and_then(tag(":")),
whitespace_and_then(Self::match_value),
),
whitespace_and_then(alt((tag(","), peek(tag("}"))))),
)(self)
}

/// Matches a struct field name. That is:
/// * A quoted symbol
/// * An identifier
/// * A symbol ID
/// * A short-form string
pub fn match_struct_field_name(self) -> IonParseResult<'data, MatchedSymbol> {
alt((
Self::match_symbol,
Self::match_short_string.map(|s| {
// NOTE: We're "casting" the matched short string to a matched symbol here.
// This relies on the fact that the MatchedSymbol logic ignores
// the first and last matched byte, which are usually single
// quotes but in this case are double quotes.
match s {
MatchedString::ShortWithoutEscapes => MatchedSymbol::QuotedWithoutEscapes,
MatchedString::ShortWithEscapes => MatchedSymbol::QuotedWithEscapes,
_ => unreachable!("field name parser matched long string"),
}
}),
))(self)
}

/// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or
/// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed).
fn match_delimiter_after_list_value(self) -> IonMatchResult<'data> {
Expand Down Expand Up @@ -317,9 +391,15 @@ impl<'data> TextBufferView<'data> {
},
),
map(
match_and_length(tag("[")),
|(_matched_list_start, length)| {
EncodedTextValue::new(MatchedValue::List, self.offset(), length)
match_and_length(Self::match_list),
|(matched_list, length)| {
EncodedTextValue::new(MatchedValue::List, matched_list.offset(), length)
},
),
map(
match_and_length(Self::match_struct),
|(matched_struct, length)| {
EncodedTextValue::new(MatchedValue::Struct, matched_struct.offset(), length)
},
),
// TODO: The other Ion types
Expand All @@ -331,6 +411,74 @@ impl<'data> TextBufferView<'data> {
.parse(self)
}

/// Matches a list.
///
/// If the input does not contain the entire list, returns `IonError::Incomplete(_)`.
pub fn match_list(self) -> IonMatchResult<'data> {
// If it doesn't start with [, it isn't a list.
if self.bytes().first() != Some(&b'[') {
let error = InvalidInputError::new(self);
return Err(nom::Err::Error(IonParseError::Invalid(error)));
}
// Scan ahead to find the end of this list.
let list_body = self.slice_to_end(1);
let sequence_iter = RawTextSequenceIterator::new(b']', list_body);
let span = match sequence_iter.find_span() {
Ok(span) => span,
// If the complete container isn't available, return an incomplete.
Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)),
// If invalid syntax was encountered, return a failure to prevent nom from trying
// other parser kinds.
Err(e) => {
return {
let error = InvalidInputError::new(self)
.with_label("matching a list")
.with_description(format!("{}", e));
Err(nom::Err::Failure(IonParseError::Invalid(error)))
}
}
};

// For the matched span, we use `self` again to include the opening `[`
let matched = self.slice(0, span.len());
let remaining = self.slice_to_end(span.len());
Ok((remaining, matched))
}

/// Matches a struct.
///
/// If the input does not contain the entire struct, returns `IonError::Incomplete(_)`.
pub fn match_struct(self) -> IonMatchResult<'data> {
// If it doesn't start with {, it isn't a struct.
if self.bytes().first() != Some(&b'{') {
let error = InvalidInputError::new(self);
return Err(nom::Err::Error(IonParseError::Invalid(error)));
}
// Scan ahead to find the end of this struct.
let struct_body = self.slice_to_end(1);
let struct_iter = RawTextStructIterator::new(struct_body);
let span = match struct_iter.find_span() {
Ok(span) => span,
// If the complete container isn't available, return an incomplete.
Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)),
// If invalid syntax was encountered, return a failure to prevent nom from trying
// other parser kinds.
Err(e) => {
return {
let error = InvalidInputError::new(self)
.with_label("matching a struct")
.with_description(format!("{}", e));
Err(nom::Err::Failure(IonParseError::Invalid(error)))
}
}
};

// For the matched span, we use `self` again to include the opening `{`
let matched = self.slice(0, span.len());
let remaining = self.slice_to_end(span.len());
Ok((remaining, matched))
}

/// Matches a boolean value.
pub fn match_bool(self) -> IonMatchResult<'data> {
recognize(Self::read_bool)(self)
Expand Down Expand Up @@ -617,7 +765,11 @@ impl<'data> TextBufferView<'data> {
fn match_short_string(self) -> IonParseResult<'data, MatchedString> {
delimited(char('"'), Self::match_short_string_body, char('"'))
.map(|(_matched, contains_escaped_chars)| {
MatchedString::Short(MatchedShortString::new(contains_escaped_chars))
if contains_escaped_chars {
MatchedString::ShortWithEscapes
} else {
MatchedString::ShortWithoutEscapes
}
})
.parse(self)
}
Expand Down Expand Up @@ -715,7 +867,13 @@ impl<'data> TextBufferView<'data> {
/// Matches a quoted symbol (`'foo'`).
fn match_quoted_symbol(self) -> IonParseResult<'data, MatchedSymbol> {
delimited(char('\''), Self::match_quoted_symbol_body, char('\''))
.map(|(_matched, contains_escaped_chars)| MatchedSymbol::Quoted(contains_escaped_chars))
.map(|(_matched, contains_escaped_chars)| {
if contains_escaped_chars {
MatchedSymbol::QuotedWithEscapes
} else {
MatchedSymbol::QuotedWithoutEscapes
}
})
.parse(self)
}

Expand Down Expand Up @@ -906,6 +1064,20 @@ impl<'data> nom::InputTakeAtPosition for TextBufferView<'data> {

// === end of `nom` trait implementations

/// Takes a given parser and returns a new one that accepts any amount of leading whitespace before
/// calling the original parser.
fn whitespace_and_then<'data, P, O>(
parser: P,
) -> impl Parser<TextBufferView<'data>, O, IonParseError<'data>>
where
P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
{
preceded(
TextBufferView::match_optional_comments_and_whitespace,
parser,
)
}

/// Augments a given parser such that it returns the matched value and the number of input bytes
/// that it matched.
fn match_and_length<'data, P, O>(
Expand All @@ -926,6 +1098,26 @@ where
}
}

/// Augments a given parser such that it returns the matched value and the range of input bytes
/// that it matched.
fn match_and_span<'data, P, O>(
mut parser: P,
) -> impl Parser<TextBufferView<'data>, (O, Range<usize>), IonParseError<'data>>
where
P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
{
move |input: TextBufferView<'data>| {
let offset_before = input.offset();
let (remaining, matched) = match parser.parse(input) {
Ok((remaining, matched)) => (remaining, matched),
Err(e) => return Err(e),
};
let offset_after = remaining.offset();
let span = offset_before..offset_after;
Ok((remaining, (matched, span)))
}
}

/// Returns the number of bytes that the provided parser matched.
fn match_length<'data, P, O>(
parser: P,
Expand Down

0 comments on commit 998a5bf

Please sign in to comment.