From e0a83d8838c88bed1da0f647829841db587bde35 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Sat, 15 Jul 2023 23:33:03 -0400
Subject: [PATCH 01/15] Top-level nulls, bools, ints

---
 Cargo.toml                                    |   2 +-
 src/lazy/binary/encoding.rs                   |  19 -
 src/lazy/binary/mod.rs                        |   1 -
 src/lazy/binary/raw/mod.rs                    |   2 +-
 src/lazy/binary/raw/reader.rs                 |   2 +-
 .../raw/{lazy_raw_sequence.rs => sequence.rs} |   2 +-
 src/lazy/binary/raw/struct.rs                 |   2 +-
 src/lazy/binary/raw/value.rs                  |  16 +-
 src/lazy/decoder.rs                           |   1 +
 src/lazy/encoding.rs                          | 133 ++++
 src/lazy/mod.rs                               |   2 +
 src/lazy/raw_value_ref.rs                     |   8 +
 src/lazy/reader.rs                            |   2 +-
 src/lazy/sequence.rs                          |   2 +-
 src/lazy/struct.rs                            |   2 +-
 src/lazy/system_reader.rs                     |   2 +-
 src/lazy/text/as_utf8.rs                      |  33 +
 src/lazy/text/buffer.rs                       | 730 ++++++++++++++++++
 src/lazy/text/encoded_value.rs                | 207 +++++
 src/lazy/text/matched.rs                      | 108 +++
 src/lazy/text/mod.rs                          |   7 +
 src/lazy/text/parse_result.rs                 | 274 +++++++
 src/lazy/text/raw/mod.rs                      |   1 +
 src/lazy/text/raw/reader.rs                   | 189 +++++
 src/lazy/text/value.rs                        |  66 ++
 src/lazy/value.rs                             |   2 +-
 src/position.rs                               |  26 +-
 src/result/decoding_error.rs                  |  12 +
 src/result/incomplete.rs                      |   7 +-
 src/result/mod.rs                             |   6 +-
 30 files changed, 1818 insertions(+), 48 deletions(-)
 delete mode 100644 src/lazy/binary/encoding.rs
 rename src/lazy/binary/raw/{lazy_raw_sequence.rs => sequence.rs} (98%)
 create mode 100644 src/lazy/encoding.rs
 create mode 100644 src/lazy/text/as_utf8.rs
 create mode 100644 src/lazy/text/buffer.rs
 create mode 100644 src/lazy/text/encoded_value.rs
 create mode 100644 src/lazy/text/matched.rs
 create mode 100644 src/lazy/text/mod.rs
 create mode 100644 src/lazy/text/parse_result.rs
 create mode 100644 src/lazy/text/raw/mod.rs
 create mode 100644 src/lazy/text/raw/reader.rs
 create mode 100644 src/lazy/text/value.rs

diff --git a/Cargo.toml b/Cargo.toml
index 5f148b85..54771ff8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,7 +66,7 @@ num-bigint = "0.4.3"
 num-integer = "0.1.44"
 num-traits = "0.2"
 arrayvec = "0.7"
-smallvec = "1.9.0"
+smallvec = {version ="1.9.0", features = ["const_generics"]}
 digest = { version = "0.9", optional = true }
 sha2 = { version = "0.9", optional = true }
 
diff --git a/src/lazy/binary/encoding.rs b/src/lazy/binary/encoding.rs
deleted file mode 100644
index e26d0b51..00000000
--- a/src/lazy/binary/encoding.rs
+++ /dev/null
@@ -1,19 +0,0 @@
-use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator;
-use crate::lazy::binary::raw::lazy_raw_sequence::LazyRawBinarySequence;
-use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct;
-use crate::lazy::binary::raw::reader::LazyRawBinaryReader;
-use crate::lazy::binary::raw::value::LazyRawBinaryValue;
-use crate::lazy::decoder::LazyDecoder;
-
-// This type derives trait implementations in order to allow types that contain it to also derive
-// trait implementations.
-#[derive(Clone, Debug)]
-pub struct BinaryEncoding;
-
-impl<'data> LazyDecoder<'data> for BinaryEncoding {
-    type Reader = LazyRawBinaryReader<'data>;
-    type Value = LazyRawBinaryValue<'data>;
-    type Sequence = LazyRawBinarySequence<'data>;
-    type Struct = LazyRawBinaryStruct<'data>;
-    type AnnotationsIterator = RawBinaryAnnotationsIterator<'data>;
-}
diff --git a/src/lazy/binary/mod.rs b/src/lazy/binary/mod.rs
index cfc54e78..93017274 100644
--- a/src/lazy/binary/mod.rs
+++ b/src/lazy/binary/mod.rs
@@ -2,6 +2,5 @@ mod encoded_value;
 pub mod immutable_buffer;
 pub mod raw;
 
-pub(crate) mod encoding;
 #[cfg(test)]
 pub(crate) mod test_utilities;
diff --git a/src/lazy/binary/raw/mod.rs b/src/lazy/binary/raw/mod.rs
index 0861993f..3df82f4d 100644
--- a/src/lazy/binary/raw/mod.rs
+++ b/src/lazy/binary/raw/mod.rs
@@ -1,5 +1,5 @@
 pub mod annotations_iterator;
-pub mod lazy_raw_sequence;
 pub mod reader;
+pub mod sequence;
 pub mod r#struct;
 pub mod value;
diff --git a/src/lazy/binary/raw/reader.rs b/src/lazy/binary/raw/reader.rs
index da3a983c..77297e54 100644
--- a/src/lazy/binary/raw/reader.rs
+++ b/src/lazy/binary/raw/reader.rs
@@ -1,7 +1,7 @@
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::binary::immutable_buffer::ImmutableBuffer;
 use crate::lazy::binary::raw::value::LazyRawBinaryValue;
 use crate::lazy::decoder::LazyRawReader;
+use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::raw_stream_item::RawStreamItem;
 use crate::result::IonFailure;
 use crate::IonResult;
diff --git a/src/lazy/binary/raw/lazy_raw_sequence.rs b/src/lazy/binary/raw/sequence.rs
similarity index 98%
rename from src/lazy/binary/raw/lazy_raw_sequence.rs
rename to src/lazy/binary/raw/sequence.rs
index 16dbb021..66d26fef 100644
--- a/src/lazy/binary/raw/lazy_raw_sequence.rs
+++ b/src/lazy/binary/raw/sequence.rs
@@ -1,10 +1,10 @@
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::binary::immutable_buffer::ImmutableBuffer;
 use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator;
 use crate::lazy::binary::raw::reader::DataSource;
 use crate::lazy::binary::raw::value::LazyRawBinaryValue;
 use crate::lazy::decoder::private::LazyContainerPrivate;
 use crate::lazy::decoder::LazyRawSequence;
+use crate::lazy::encoding::BinaryEncoding;
 use crate::{IonResult, IonType};
 use std::fmt;
 use std::fmt::{Debug, Formatter};
diff --git a/src/lazy/binary/raw/struct.rs b/src/lazy/binary/raw/struct.rs
index 34ca489a..3f82ed16 100644
--- a/src/lazy/binary/raw/struct.rs
+++ b/src/lazy/binary/raw/struct.rs
@@ -1,10 +1,10 @@
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::binary::immutable_buffer::ImmutableBuffer;
 use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator;
 use crate::lazy::binary::raw::reader::DataSource;
 use crate::lazy::binary::raw::value::LazyRawBinaryValue;
 use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate};
 use crate::lazy::decoder::{LazyRawField, LazyRawStruct};
+use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::raw_value_ref::RawValueRef;
 use crate::raw_symbol_token_ref::AsRawSymbolTokenRef;
 use crate::{IonResult, RawSymbolTokenRef};
diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs
index 0ffcb028..9ed2340e 100644
--- a/src/lazy/binary/raw/value.rs
+++ b/src/lazy/binary/raw/value.rs
@@ -1,13 +1,13 @@
 use crate::binary::int::DecodedInt;
 use crate::binary::uint::DecodedUInt;
 use crate::lazy::binary::encoded_value::EncodedValue;
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::binary::immutable_buffer::ImmutableBuffer;
 use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator;
-use crate::lazy::binary::raw::lazy_raw_sequence::LazyRawBinarySequence;
 use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct;
+use crate::lazy::binary::raw::sequence::LazyRawBinarySequence;
 use crate::lazy::decoder::private::LazyRawValuePrivate;
 use crate::lazy::decoder::LazyRawValue;
+use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::raw_value_ref::RawValueRef;
 use crate::result::IonFailure;
 use crate::types::SymbolId;
@@ -35,7 +35,7 @@ impl<'a> Debug for LazyRawBinaryValue<'a> {
     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
         write!(
             f,
-            "LazyRawValue {{\n  val={:?},\n  buf={:?}\n}}\n",
+            "LazyRawBinaryValue {{\n  val={:?},\n  buf={:?}\n}}\n",
             self.encoded_value, self.input
         )
     }
@@ -54,6 +54,10 @@ impl<'data> LazyRawValue<'data, BinaryEncoding> for LazyRawBinaryValue<'data> {
         self.ion_type()
     }
 
+    fn is_null(&self) -> bool {
+        self.is_null()
+    }
+
     fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> {
         self.annotations()
     }
@@ -70,6 +74,10 @@ impl<'data> LazyRawBinaryValue<'data> {
         self.encoded_value.ion_type()
     }
 
+    pub fn is_null(&self) -> bool {
+        self.encoded_value.header().is_null()
+    }
+
     /// Returns `true` if this value has a non-empty annotations sequence; otherwise, returns `false`.
     fn has_annotations(&self) -> bool {
         self.encoded_value.has_annotations()
@@ -118,7 +126,7 @@ impl<'data> LazyRawBinaryValue<'data> {
     /// [`LazyRawBinarySequence`] or [`LazyStruct`](crate::lazy::struct::LazyStruct)
     /// that can be traversed to access the container's contents.
     pub fn read(&self) -> ValueParseResult<'data, BinaryEncoding> {
-        if self.encoded_value.header().is_null() {
+        if self.is_null() {
             let raw_value_ref = RawValueRef::Null(self.ion_type());
             return Ok(raw_value_ref);
         }
diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs
index 5f784c42..e53ad2d2 100644
--- a/src/lazy/decoder.rs
+++ b/src/lazy/decoder.rs
@@ -62,6 +62,7 @@ pub trait LazyRawValue<'data, D: LazyDecoder<'data>>:
     private::LazyRawValuePrivate<'data> + Clone + Debug
 {
     fn ion_type(&self) -> IonType;
+    fn is_null(&self) -> bool;
     fn annotations(&self) -> D::AnnotationsIterator;
     fn read(&self) -> IonResult<RawValueRef<'data, D>>;
 }
diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs
new file mode 100644
index 00000000..784879ad
--- /dev/null
+++ b/src/lazy/encoding.rs
@@ -0,0 +1,133 @@
+use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator;
+use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct;
+use crate::lazy::binary::raw::reader::LazyRawBinaryReader;
+use crate::lazy::binary::raw::sequence::LazyRawBinarySequence;
+use crate::lazy::binary::raw::value::LazyRawBinaryValue;
+use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate};
+use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawSequence, LazyRawStruct};
+use crate::lazy::raw_value_ref::RawValueRef;
+use crate::lazy::text::raw::reader::LazyRawTextReader;
+use crate::lazy::text::value::LazyRawTextValue;
+use crate::{IonResult, IonType, RawSymbolTokenRef};
+use std::marker::PhantomData;
+
+// These types derive trait implementations in order to allow types that containing them
+// to also derive trait implementations.
+
+/// The Ion 1.0 binary encoding.
+#[derive(Clone, Debug)]
+pub struct BinaryEncoding;
+
+/// The Ion 1.0 text encoding.
+#[derive(Clone, Debug)]
+pub struct TextEncoding;
+
+impl<'data> LazyDecoder<'data> for BinaryEncoding {
+    type Reader = LazyRawBinaryReader<'data>;
+    type Value = LazyRawBinaryValue<'data>;
+    type Sequence = LazyRawBinarySequence<'data>;
+    type Struct = LazyRawBinaryStruct<'data>;
+    type AnnotationsIterator = RawBinaryAnnotationsIterator<'data>;
+}
+
+// === Placeholders ===
+// The types below will need to be properly defined in order for the lazy text reader to be complete.
+// The exist to satisfy various trait definitions.
+#[derive(Debug, Clone)]
+pub struct ToDoTextSequence;
+
+impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextSequence {
+    fn from_value(_value: LazyRawTextValue<'data>) -> Self {
+        todo!()
+    }
+}
+
+impl<'data> LazyRawSequence<'data, TextEncoding> for ToDoTextSequence {
+    type Iterator = Box<dyn Iterator<Item = IonResult<LazyRawTextValue<'data>>>>;
+
+    fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> {
+        todo!()
+    }
+
+    fn ion_type(&self) -> IonType {
+        todo!()
+    }
+
+    fn iter(&self) -> Self::Iterator {
+        todo!()
+    }
+
+    fn as_value(&self) -> &<TextEncoding as LazyDecoder<'data>>::Value {
+        todo!()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ToDoTextStruct;
+
+#[derive(Debug, Clone)]
+pub struct ToDoTextField;
+
+impl<'data> LazyRawFieldPrivate<'data, TextEncoding> for ToDoTextField {
+    fn into_value(self) -> LazyRawTextValue<'data> {
+        todo!()
+    }
+}
+
+impl<'data> LazyRawField<'data, TextEncoding> for ToDoTextField {
+    fn name(&self) -> RawSymbolTokenRef<'data> {
+        todo!()
+    }
+
+    fn value(&self) -> &LazyRawTextValue<'data> {
+        todo!()
+    }
+}
+
+impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextStruct {
+    fn from_value(_value: <TextEncoding as LazyDecoder>::Value) -> Self {
+        todo!()
+    }
+}
+
+impl<'data> LazyRawStruct<'data, TextEncoding> for ToDoTextStruct {
+    type Field = ToDoTextField;
+    type Iterator = Box<dyn Iterator<Item = IonResult<ToDoTextField>>>;
+
+    fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> {
+        todo!()
+    }
+
+    fn find(&self, _name: &str) -> IonResult<Option<LazyRawTextValue<'data>>> {
+        todo!()
+    }
+
+    fn get(&self, _name: &str) -> IonResult<Option<RawValueRef<'data, TextEncoding>>> {
+        todo!()
+    }
+
+    fn iter(&self) -> Self::Iterator {
+        todo!()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ToDoTextAnnotationsIterator<'data> {
+    spooky: &'data PhantomData<()>,
+}
+
+impl<'data> Iterator for ToDoTextAnnotationsIterator<'data> {
+    type Item = IonResult<RawSymbolTokenRef<'data>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        todo!()
+    }
+}
+
+impl<'data> LazyDecoder<'data> for TextEncoding {
+    type Reader = LazyRawTextReader<'data>;
+    type Value = LazyRawTextValue<'data>;
+    type Sequence = ToDoTextSequence;
+    type Struct = ToDoTextStruct;
+    type AnnotationsIterator = ToDoTextAnnotationsIterator<'data>;
+}
diff --git a/src/lazy/mod.rs b/src/lazy/mod.rs
index c0c3c413..3f42baa8 100644
--- a/src/lazy/mod.rs
+++ b/src/lazy/mod.rs
@@ -3,6 +3,7 @@
 
 pub mod binary;
 pub mod decoder;
+pub(crate) mod encoding;
 pub mod raw_stream_item;
 pub mod raw_value_ref;
 pub mod reader;
@@ -10,5 +11,6 @@ pub mod sequence;
 pub mod r#struct;
 pub mod system_reader;
 pub mod system_stream_item;
+pub mod text;
 pub mod value;
 pub mod value_ref;
diff --git a/src/lazy/raw_value_ref.rs b/src/lazy/raw_value_ref.rs
index 11b09bd6..a0da98eb 100644
--- a/src/lazy/raw_value_ref.rs
+++ b/src/lazy/raw_value_ref.rs
@@ -69,6 +69,14 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> {
         }
     }
 
+    pub fn expect_i64(self) -> IonResult<i64> {
+        if let RawValueRef::Int(i) = self {
+            i.expect_i64()
+        } else {
+            IonResult::decoding_error("expected an i64 (int)")
+        }
+    }
+
     pub fn expect_float(self) -> IonResult<f64> {
         if let RawValueRef::Float(f) = self {
             Ok(f)
diff --git a/src/lazy/reader.rs b/src/lazy/reader.rs
index bd20656c..2f3cfbb4 100644
--- a/src/lazy/reader.rs
+++ b/src/lazy/reader.rs
@@ -1,8 +1,8 @@
 use crate::binary::constants::v1_0::IVM;
 use crate::element::reader::ElementReader;
 use crate::element::Element;
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::decoder::LazyDecoder;
+use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::system_reader::LazySystemReader;
 use crate::lazy::value::LazyValue;
 use crate::result::IonFailure;
diff --git a/src/lazy/sequence.rs b/src/lazy/sequence.rs
index 7f7f810f..b3c830f8 100644
--- a/src/lazy/sequence.rs
+++ b/src/lazy/sequence.rs
@@ -1,5 +1,5 @@
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::decoder::{LazyDecoder, LazyRawSequence, LazyRawValue};
+use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::value::{AnnotationsIterator, LazyValue};
 use crate::{Annotations, Element, IntoAnnotatedElement, Sequence, Value};
 use crate::{IonError, IonResult, IonType, SymbolTable};
diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs
index f7347efd..2251b949 100644
--- a/src/lazy/struct.rs
+++ b/src/lazy/struct.rs
@@ -1,7 +1,7 @@
 use crate::element::builders::StructBuilder;
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::decoder::private::{LazyRawFieldPrivate, LazyRawValuePrivate};
 use crate::lazy::decoder::{LazyDecoder, LazyRawStruct};
+use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::value::{AnnotationsIterator, LazyValue};
 use crate::lazy::value_ref::ValueRef;
 use crate::result::IonFailure;
diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs
index 4936fa8a..bdf76de2 100644
--- a/src/lazy/system_reader.rs
+++ b/src/lazy/system_reader.rs
@@ -1,4 +1,4 @@
-use crate::lazy::binary::encoding::BinaryEncoding;
+use crate::lazy::encoding::BinaryEncoding;
 use crate::result::IonFailure;
 use crate::{IonResult, IonType, RawSymbolTokenRef, SymbolTable};
 
diff --git a/src/lazy/text/as_utf8.rs b/src/lazy/text/as_utf8.rs
new file mode 100644
index 00000000..0d1e211c
--- /dev/null
+++ b/src/lazy/text/as_utf8.rs
@@ -0,0 +1,33 @@
+use crate::lazy::text::buffer::TextBufferView;
+use crate::position::Position;
+use crate::result::DecodingError;
+use crate::{IonError, IonResult};
+use smallvec::SmallVec;
+
+/// Attempts to validate a byte sequence as UTF-8 text. If the data is not valid UTF-8, returns
+/// an [`IonError`].
+///
+/// The provided `position` is added to the `IonError` that is constructed if the data is not valid.
+pub(crate) trait AsUtf8 {
+    fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str>;
+}
+
+impl<const N: usize> AsUtf8 for SmallVec<[u8; N]> {
+    fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
+        std::str::from_utf8(self.as_ref()).map_err(|_| {
+            let decoding_error =
+                DecodingError::new("encountered invalid UTF-8").with_position(position);
+            IonError::Decoding(decoding_error)
+        })
+    }
+}
+
+impl<'data> AsUtf8 for TextBufferView<'data> {
+    fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
+        std::str::from_utf8(self.bytes()).map_err(|_| {
+            let decoding_error =
+                DecodingError::new("encountered invalid UTF-8").with_position(position);
+            IonError::Decoding(decoding_error)
+        })
+    }
+}
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
new file mode 100644
index 00000000..54ecf4f6
--- /dev/null
+++ b/src/lazy/text/buffer.rs
@@ -0,0 +1,730 @@
+use crate::lazy::encoding::TextEncoding;
+use crate::lazy::raw_stream_item::RawStreamItem;
+use crate::lazy::text::encoded_value::EncodedTextValue;
+use crate::lazy::text::matched::{MatchedInt, MatchedValue};
+use crate::lazy::text::parse_result::IonParseError;
+use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
+use crate::lazy::text::value::LazyRawTextValue;
+use crate::{IonResult, IonType};
+use nom::branch::alt;
+use nom::bytes::streaming::{is_a, tag, take_while1};
+use nom::character::streaming::{char, digit1, one_of};
+use nom::combinator::{map, opt, peek, recognize, success, value};
+use nom::error::{ErrorKind, ParseError};
+use nom::multi::many0_count;
+use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
+use nom::{CompareResult, IResult, InputLength, InputTake, Needed, Parser};
+use std::fmt::{Debug, Formatter};
+use std::iter::{Copied, Enumerate};
+use std::ops::{RangeFrom, RangeTo};
+use std::slice::Iter;
+
+impl<'a> Debug for TextBufferView<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "TextBufferView {{")?;
+        // Try to read the next several bytes from the buffer as UTF-8...
+        let text_result = std::str::from_utf8(self.data);
+        // ...if it works, print the first 32 unicode scalars...
+        if let Ok(text) = text_result {
+            write!(f, "\"{}...\"", text.chars().take(32).collect::<String>())?;
+        } else {
+            // ...if it doesn't, print the first 32 bytes in hex.
+            write!(f, "Invalid UTF-8")?;
+            for byte in self.bytes().iter().take(32) {
+                write!(f, "{:x?} ", *byte)?;
+            }
+            if self.bytes().len() > 32 {
+                write!(f, "...{} more bytes", self.bytes().len() - 32)?;
+            }
+        }
+        write!(f, "}}")
+    }
+}
+
+/// The Ion specification's enumeration of whitespace characters.
+const WHITESPACE_CHARACTERS: &[char] = &[
+    ' ',    // Space
+    '\t',   // Tab
+    '\r',   // Carriage return
+    '\n',   // Newline
+    '\x09', // Horizontal tab
+    '\x0B', // Vertical tab
+    '\x0C', // Form feed
+];
+
+/// Same as [WHITESPACE_CHARACTERS], but formatted as a string for use in some `nom` APIs
+const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C";
+
+/// A slice of unsigned bytes that can be cheaply copied and which defines methods for parsing
+/// the various encoding elements of a text Ion stream.
+///
+/// Upon success, each parsing method on the `TextBufferView` will return the value that was read
+/// and a new copy of the `TextBufferView` that starts _after_ the bytes that were parsed.
+///
+/// Methods that begin with `match_` return the input slice that they matched OR a `MatchedValue`
+/// that retains additional information found during the matching process.
+#[derive(PartialEq, Clone, Copy)]
+pub(crate) struct TextBufferView<'a> {
+    // `data` is a slice of remaining data in the larger input stream.
+    // `offset` is the absolute position in the overall input stream where that slice begins.
+    //
+    // input: 00 01 02 03 04 05 06 07 08 09
+    //                          └────┬────┘
+    //                          data: &[u8]
+    //                          offset: 6
+    data: &'a [u8],
+    offset: usize,
+}
+
+pub(crate) type ParseResult<'a, T> = IonResult<(T, TextBufferView<'a>)>;
+
+impl<'data> TextBufferView<'data> {
+    /// Constructs a new `TextBufferView` that wraps `data`.
+    #[inline]
+    pub fn new(data: &[u8]) -> TextBufferView {
+        Self::new_with_offset(data, 0)
+    }
+
+    pub fn new_with_offset(data: &[u8], offset: usize) -> TextBufferView {
+        TextBufferView { data, offset }
+    }
+
+    /// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues for
+    /// `length` bytes.
+    ///
+    /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the
+    /// larger stream of which the buffer is a piece.
+    pub fn slice(&self, offset: usize, length: usize) -> TextBufferView<'data> {
+        TextBufferView {
+            data: &self.data[offset..offset + length],
+            offset: self.offset + offset,
+        }
+    }
+
+    /// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues
+    /// to the end.
+    ///
+    /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the
+    /// larger stream of which the buffer is a piece.
+    pub fn slice_to_end(&self, offset: usize) -> TextBufferView<'data> {
+        TextBufferView {
+            data: &self.data[offset..],
+            offset: self.offset + offset,
+        }
+    }
+
+    /// Returns a slice containing all of the buffer's bytes.
+    pub fn bytes(&self) -> &[u8] {
+        self.data
+    }
+
+    /// Returns the number of bytes between the start of the original input byte array and the
+    /// subslice of that byte array that this `TextBufferView` represents.
+    pub fn offset(&self) -> usize {
+        self.offset
+    }
+
+    /// Returns the number of bytes in the buffer.
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Returns `true` if there are no bytes in the buffer. Otherwise, returns `false`.
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+
+    /// Creates a copy of this `TextBufferView` that begins `num_bytes_to_consume` further into the
+    /// slice.
+    #[inline]
+    pub fn consume(&self, num_bytes_to_consume: usize) -> Self {
+        // This assertion is always run during testing but is removed in the release build.
+        debug_assert!(num_bytes_to_consume <= self.len());
+        Self {
+            data: &self.data[num_bytes_to_consume..],
+            offset: self.offset + num_bytes_to_consume,
+        }
+    }
+
+    // An adapter for nom::combinator::success.
+    // Always succeeds and consumes none of the input. Returns an empty slice of the buffer.
+    pub fn match_nothing(self) -> IonMatchResult<'data> {
+        // Return an empty slice from the head position
+        success(self.slice(0, 0))(self)
+    }
+
+    pub fn match_whitespace(self) -> IonMatchResult<'data> {
+        is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
+    }
+
+    pub fn match_optional_whitespace(self) -> IonMatchResult<'data> {
+        // Either match whitespace and return what follows or just return the input as-is.
+        // This will always return `Ok`, but is packaged as an IonMatchResult for compatability
+        alt((Self::match_whitespace, Self::match_nothing))(self)
+    }
+
+    pub fn read_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
+        let (remaining, value) = match self.read_value() {
+            Ok(value) => value,
+            Err(e) => return Err(e),
+        };
+
+        // TODO: Check to see if `value` is actually an IVM.
+        //       => If it's a symbol, try the IVM parser on it and see if it succeeds.
+        //       For now, we just return the value.
+        Ok((remaining, RawStreamItem::Value(value)))
+    }
+
+    pub fn read_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> {
+        alt((
+            // For `null` and `bool`, we use `read_` instead of `match_` because there's no additional
+            // parsing to be done.
+            map(match_and_length(Self::read_null), |(ion_type, length)| {
+                EncodedTextValue::new(MatchedValue::Null(ion_type), self.offset(), length)
+            }),
+            map(match_and_length(Self::read_bool), |(value, length)| {
+                EncodedTextValue::new(MatchedValue::Bool(value), self.offset(), length)
+            }),
+            // For `int` and the other types, we use `match` and store the partially-processed input in the
+            // `matched_value` field of the `EncodedTextValue` we return.
+            map(
+                match_and_length(Self::match_int),
+                |(matched_int, length)| {
+                    EncodedTextValue::new(MatchedValue::Int(matched_int), self.offset(), length)
+                },
+            ),
+            // TODO: The other Ion types
+        ))
+        .map(|encoded_value| LazyRawTextValue {
+            encoded_value,
+            input: self,
+        })
+        .parse(self)
+    }
+
+    pub fn match_bool(self) -> IonMatchResult<'data> {
+        recognize(Self::read_bool)(self)
+    }
+
+    pub fn read_bool(self) -> IonParseResult<'data, bool> {
+        terminated(
+            alt((value(true, tag("true")), value(false, tag("false")))),
+            Self::peek_stop_character,
+        )(self)
+    }
+
+    pub fn match_null(self) -> IonMatchResult<'data> {
+        recognize(Self::read_null)(self)
+    }
+
+    pub fn read_null(self) -> IonParseResult<'data, IonType> {
+        delimited(
+            tag("null"),
+            opt(preceded(char('.'), Self::read_ion_type)),
+            Self::peek_stop_character,
+        )
+        .map(|explicit_ion_type| explicit_ion_type.unwrap_or(IonType::Null))
+        .parse(self)
+    }
+
+    fn match_ion_type(self) -> IonMatchResult<'data> {
+        recognize(Self::read_ion_type)(self)
+    }
+
+    fn read_ion_type(self) -> IonParseResult<'data, IonType> {
+        alt((
+            value(IonType::Null, tag("null")),
+            value(IonType::Bool, tag("bool")),
+            value(IonType::Int, tag("int")),
+            value(IonType::Float, tag("float")),
+            value(IonType::Decimal, tag("decimal")),
+            value(IonType::Timestamp, tag("timestamp")),
+            value(IonType::Symbol, tag("symbol")),
+            value(IonType::String, tag("string")),
+            value(IonType::Clob, tag("clob")),
+            value(IonType::Blob, tag("blob")),
+            value(IonType::List, tag("list")),
+            value(IonType::SExp, tag("sexp")),
+            value(IonType::Struct, tag("struct")),
+        ))(self)
+    }
+
+    fn match_stop_character(self) -> IonMatchResult<'data> {
+        recognize(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}")).parse(self)
+    }
+
+    fn peek_stop_character(self) -> IonMatchResult<'data> {
+        peek(Self::match_stop_character).parse(self)
+    }
+
+    /// Matches the three parts of an int--its base, its sign, and its digits--without actually
+    /// constructing an Int from them.
+    fn match_int(self) -> IonParseResult<'data, MatchedInt> {
+        terminated(
+            // We test for base 16 and base 2 so the '0x' or '0b' isn't confused for a leading zero
+            // in a base 10 number, which would be illegal.
+            alt((
+                Self::match_base_2_int,
+                Self::match_base_16_int,
+                Self::match_base_10_int,
+            )),
+            Self::peek_stop_character,
+        )(self)
+    }
+
+    /// Matches a base-2 notation integer (e.g. `0b0`, `0B1010`, or `-0b0111`) and returns the
+    /// partially parsed value as a [`MatchedInt`].
+    fn match_base_2_int(self) -> IonParseResult<'data, MatchedInt> {
+        separated_pair(
+            opt(char('-')),
+            alt((tag("0b"), tag("0B"))),
+            Self::match_base_2_int_digits,
+        )
+        .map(|(maybe_sign, digits)| {
+            MatchedInt::new(2, maybe_sign.is_some(), digits.offset() - self.offset())
+        })
+        .parse(self)
+    }
+
+    /// Matches the digits of a base-2 integer.
+    fn match_base_2_int_digits(self) -> IonMatchResult<'data> {
+        recognize(terminated(
+            // Zero or more digits-followed-by-underscores
+            many0_count(pair(is_a("01"), char('_'))),
+            // One or more digits
+            is_a("01"),
+        ))(self)
+    }
+
+    /// Matches a base-10 notation integer (e.g. `0`, `255`, or `-1_024`) and returns the partially
+    /// parsed value as a [`MatchedInt`].
+    fn match_base_10_int(self) -> IonParseResult<'data, MatchedInt> {
+        pair(opt(char('-')), Self::match_base_10_int_digits)
+            .map(|(maybe_sign, digits)| {
+                MatchedInt::new(10, maybe_sign.is_some(), digits.offset() - self.offset())
+            })
+            .parse(self)
+    }
+
+    /// Matches the digits of a base-10 integer. (i.e. An integer without a sign.)
+    fn match_base_10_int_digits(self) -> IonMatchResult<'data> {
+        alt((
+            // The number is either a zero...
+            recognize(char('0')),
+            // Or it's a non-zero followed by some number of '_'-separated digits
+            Self::match_base_10_digits_before_dot,
+        ))(self)
+    }
+
+    /// Matches either:
+    /// * a zero
+    /// * a non-zero followed by some number of digits with optional underscores
+    fn match_base_10_digits_before_dot(self) -> IonMatchResult<'data> {
+        alt((
+            tag("0"),
+            recognize(pair(
+                Self::match_base_10_leading_digit,
+                Self::match_base_10_trailing_digits,
+            )),
+        ))(self)
+    }
+
+    /// Matches the first digit of a multi-digit base-10 integer. (i.e. Any digit but zero.)
+    fn match_base_10_leading_digit(self) -> IonMatchResult<'data> {
+        recognize(one_of("123456789"))(self)
+    }
+
+    /// Matches any number of digits with underscores optionally appearing in the middle.
+    /// This parser accepts leading zeros, which is why it cannot be used for the beginning
+    /// of a number.
+    fn match_base_10_trailing_digits(self) -> IonMatchResult<'data> {
+        recognize(many0_count(pair(opt(char('_')), digit1)))(self)
+    }
+
+    /// Matches a base-10 notation integer (e.g. `0x0`, `0X20`, or `-0xCAFE`) and returns the
+    /// partially parsed value as a [`MatchedInt`].
+    fn match_base_16_int(self) -> IonParseResult<'data, MatchedInt> {
+        separated_pair(
+            opt(char('-')),
+            alt((tag("0x"), tag("0X"))),
+            Self::match_base_16_int_trailing_digits,
+        )
+        .map(|(maybe_sign, digits)| {
+            MatchedInt::new(16, maybe_sign.is_some(), digits.offset() - self.offset())
+        })
+        .parse(self)
+    }
+
+    /// Matches the digits that follow the '0x' or '0X' in a base-16 integer
+    fn match_base_16_int_trailing_digits(self) -> IonMatchResult<'data> {
+        recognize(terminated(
+            // Zero or more digits-followed-by-underscores
+            many0_count(pair(Self::take_base_16_digits1, char('_'))),
+            // One or more digits
+            Self::take_base_16_digits1,
+        ))(self)
+    }
+
+    /// Recognizes 1 or more consecutive base-16 digits.
+    // This function's "1" suffix is a style borrowed from `nom`.
+    fn take_base_16_digits1(self) -> IonMatchResult<'data> {
+        take_while1(|b: u8| b.is_ascii_hexdigit())(self)
+    }
+}
+
+// === nom trait implementations ===
+// The trait implementations that follow are necessary for `TextBufferView` to be used as an input
+// type in `nom` parsers. (`nom` only supports `&str` and `&[u8]` out of the box.) Defining our own
+// input type makes it possible for us to carry around additional context during the parsing process,
+// which is important for providing helpful error messages. For example: we can include the absolute
+// offset of the input slice currently being read in our error messages.
+//
+// As `TextBufferView` is just a wrapper around a `&[u8]`, these implementations mostly delegate
+// to the existing trait impls for `&[u8]`.
+
+impl<'data> nom::InputTake for TextBufferView<'data> {
+    fn take(&self, count: usize) -> Self {
+        self.slice(0, count)
+    }
+
+    fn take_split(&self, count: usize) -> (Self, Self) {
+        let (before, after) = self.data.split_at(count);
+        let buffer_before = TextBufferView::new_with_offset(before, self.offset());
+        let buffer_after = TextBufferView::new_with_offset(after, self.offset() + count);
+        // Nom's convention is to place the remaining portion of the buffer first, which leads to
+        // a potentially surprising reversed tuple order.
+        (buffer_after, buffer_before)
+    }
+}
+
+impl<'data> nom::InputLength for TextBufferView<'data> {
+    fn input_len(&self) -> usize {
+        self.len()
+    }
+}
+
+impl<'data> nom::InputIter for TextBufferView<'data> {
+    type Item = u8;
+    type Iter = Enumerate<Self::IterElem>;
+    type IterElem = Copied<Iter<'data, u8>>;
+
+    fn iter_indices(&self) -> Self::Iter {
+        self.iter_elements().enumerate()
+    }
+
+    fn iter_elements(&self) -> Self::IterElem {
+        self.data.iter().copied()
+    }
+
+    fn position<P>(&self, predicate: P) -> Option<usize>
+    where
+        P: Fn(Self::Item) -> bool,
+    {
+        self.data.iter().position(|b| predicate(*b))
+    }
+
+    fn slice_index(&self, count: usize) -> Result<usize, Needed> {
+        self.data.slice_index(count)
+    }
+}
+
+impl<'a, 'b> nom::Compare<&'a str> for TextBufferView<'b> {
+    fn compare(&self, t: &'a str) -> CompareResult {
+        self.data.compare(t.as_bytes())
+    }
+
+    fn compare_no_case(&self, t: &'a str) -> CompareResult {
+        self.data.compare_no_case(t.as_bytes())
+    }
+}
+
+impl<'data> nom::Offset for TextBufferView<'data> {
+    fn offset(&self, second: &Self) -> usize {
+        self.data.offset(second.data)
+    }
+}
+
+impl<'data> nom::Slice<RangeFrom<usize>> for TextBufferView<'data> {
+    fn slice(&self, range: RangeFrom<usize>) -> Self {
+        self.slice_to_end(range.start)
+    }
+}
+
+impl<'data> nom::Slice<RangeTo<usize>> for TextBufferView<'data> {
+    fn slice(&self, range: RangeTo<usize>) -> Self {
+        self.slice(0, range.end)
+    }
+}
+
+impl<'data> nom::InputTakeAtPosition for TextBufferView<'data> {
+    type Item = u8;
+
+    fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E>
+    where
+        P: Fn(Self::Item) -> bool,
+    {
+        match self.data.iter().position(|c| predicate(*c)) {
+            Some(i) => Ok(self.take_split(i)),
+            None => Err(nom::Err::Incomplete(Needed::new(1))),
+        }
+    }
+
+    fn split_at_position1<P, E: ParseError<Self>>(
+        &self,
+        predicate: P,
+        e: ErrorKind,
+    ) -> IResult<Self, Self, E>
+    where
+        P: Fn(Self::Item) -> bool,
+    {
+        match self.data.iter().position(|c| predicate(*c)) {
+            Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))),
+            Some(i) => Ok(self.take_split(i)),
+            None => Err(nom::Err::Incomplete(Needed::new(1))),
+        }
+    }
+
+    fn split_at_position_complete<P, E: ParseError<Self>>(
+        &self,
+        predicate: P,
+    ) -> IResult<Self, Self, E>
+    where
+        P: Fn(Self::Item) -> bool,
+    {
+        match self.data.iter().position(|c| predicate(*c)) {
+            Some(i) => Ok(self.take_split(i)),
+            None => Ok(self.take_split(self.input_len())),
+        }
+    }
+
+    fn split_at_position1_complete<P, E: ParseError<Self>>(
+        &self,
+        predicate: P,
+        e: ErrorKind,
+    ) -> IResult<Self, Self, E>
+    where
+        P: Fn(Self::Item) -> bool,
+    {
+        match self.data.iter().position(|c| predicate(*c)) {
+            Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))),
+            Some(i) => Ok(self.take_split(i)),
+            None => {
+                if self.is_empty() {
+                    Err(nom::Err::Error(E::from_error_kind(*self, e)))
+                } else {
+                    Ok(self.take_split(self.input_len()))
+                }
+            }
+        }
+    }
+}
+
+// === end of `nom` trait implementations
+
+/// Augments a given parser such that it returns the matched value and the number of input bytes
+/// that it matched.
+fn match_and_length<'data, P, O>(
+    mut parser: P,
+) -> impl Parser<TextBufferView<'data>, (O, usize), IonParseError<'data>>
+where
+    P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
+{
+    move |input: TextBufferView<'data>| {
+        let offset_before = input.offset();
+        let (remaining, matched) = match parser.parse(input) {
+            Ok((remaining, matched)) => (remaining, matched),
+            Err(e) => return Err(e),
+        };
+        let offset_after = remaining.offset();
+        let match_length = offset_after - offset_before;
+        Ok((remaining, (matched, match_length)))
+    }
+}
+
+/// Returns the number of bytes that the provided parser matched.
+fn match_length<'data, P, O>(
+    parser: P,
+) -> impl Parser<TextBufferView<'data>, usize, IonParseError<'data>>
+where
+    P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
+{
+    // Call `match_and_length` and discard the output
+    match_and_length(parser).map(|(_output, match_length)| match_length)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Stores an input string that can be tested against a given parser.
+    struct MatchTest {
+        input: String,
+    }
+
+    impl MatchTest {
+        /// Takes an `input` string and appends a trailing space to it, guaranteeing that the
+        /// contents of the input are considered a complete token.
+        fn new(input: &str) -> Self {
+            MatchTest {
+                input: format!("{input} "), // add trailing space
+            }
+        }
+
+        fn try_match<'data, P, O>(&'data self, parser: P) -> IonParseResult<'data, usize>
+        where
+            P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
+        {
+            let buffer = TextBufferView::new(self.input.as_bytes());
+            match_length(parser).parse(buffer)
+        }
+
+        fn expect_match<'data, P, O>(&'data self, parser: P)
+        where
+            P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
+        {
+            let result = self.try_match(parser);
+            let (_remaining, match_length) = result.unwrap();
+            // Inputs have a trailing space that should _not_ be part of the match
+            assert_eq!(
+                match_length,
+                self.input.len() - 1,
+                "\nInput: '{}'\nMatched: '{}'\n",
+                self.input,
+                &self.input[..match_length]
+            );
+        }
+
+        fn expect_mismatch<'data, P, O>(&'data self, parser: P)
+        where
+            P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
+        {
+            let result = self.try_match(parser);
+            // We expect this to fail for one reason or another
+            result.unwrap_err();
+        }
+    }
+
+    #[test]
+    fn test_match_stop_char() {
+        MatchTest::new(" ").expect_match(match_length(TextBufferView::match_stop_character));
+    }
+
+    #[test]
+    fn test_match_bool() {
+        fn match_bool(input: &str) {
+            MatchTest::new(input).expect_match(match_length(TextBufferView::match_bool));
+        }
+        fn mismatch_bool(input: &str) {
+            MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_bool));
+        }
+
+        match_bool("true");
+        match_bool("false");
+
+        mismatch_bool("True");
+        mismatch_bool("TRUE");
+        mismatch_bool("False");
+        mismatch_bool("FALSE");
+        mismatch_bool("potato");
+        mismatch_bool("42");
+    }
+
+    #[test]
+    fn test_match_null() {
+        fn match_null(input: &str) {
+            MatchTest::new(input).expect_match(match_length(TextBufferView::match_null));
+        }
+        fn mismatch_null(input: &str) {
+            MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_null));
+        }
+        let good_inputs = &[
+            "null",
+            "null.null",
+            "null.bool",
+            "null.int",
+            "null.float",
+            "null.decimal",
+            "null.timestamp",
+            "null.symbol",
+            "null.string",
+            "null.clob",
+            "null.blob",
+            "null.list",
+            "null.sexp",
+            "null.struct",
+        ];
+        for input in good_inputs {
+            match_null(input);
+        }
+
+        let bad_inputs = &[
+            "-1",
+            "null.hello",
+            "nullnull",
+            "nullify",
+            "null..int",
+            "string.null",
+        ];
+        for input in bad_inputs {
+            mismatch_null(input);
+        }
+    }
+
+    #[test]
+    fn test_match_int() {
+        fn match_int(input: &str) {
+            MatchTest::new(input).expect_match(match_length(TextBufferView::match_int));
+        }
+        fn mismatch_int(input: &str) {
+            MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_int));
+        }
+        let good_inputs = &[
+            // Base 2 integers
+            "0b0",
+            "0B0",
+            "0b1",
+            "0B1",
+            "0b0001",
+            "0B1111",
+            "0b1111_1111",
+            "0B1010_1010",
+            // Base 10 integers
+            "0",
+            "13",
+            "942",
+            "7_216",
+            "1_000_000",
+            "9_999_999",
+            // Base 16 integers
+            "0x0",
+            "0x20",
+            "0x0A",
+            "0xcafe",
+            "0xCAFE",
+            "0XcaFE",
+            "0xC_A_F_E",
+            "0Xca_FE",
+        ];
+        for input in good_inputs {
+            match_int(input);
+            let negative = format!("-{input}");
+            match_int(&negative);
+        }
+
+        let bad_inputs = &[
+            "00",          // Zero with leading zero
+            "0123",        // Non-zero with leading zero
+            "--5",         // Double negative
+            "+5",          // Explicit positive
+            "1__000__000", // More than one underscore at a time
+            "_123",        // Leading underscore
+            "0x0x5",       // Multiple 0x prefixes
+            "0xx5",        // Multiple Xs after 0
+            "0x",          // Base 16 prefix w/no number
+            "0b",          // Base 2 prefix w/no number
+        ];
+        for input in bad_inputs {
+            mismatch_int(input);
+        }
+    }
+}
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
new file mode 100644
index 00000000..e1a3bcc1
--- /dev/null
+++ b/src/lazy/text/encoded_value.rs
@@ -0,0 +1,207 @@
+use crate::lazy::text::matched::MatchedValue;
+use crate::IonType;
+use std::ops::Range;
+
+/// Represents the type, offset, and length metadata of the various components of an encoded value
+/// in a text input stream.
+///
+/// Each [`LazyRawTextValue`](crate::lazy::text::value::LazyRawTextValue) contains an `EncodedValue`,
+/// allowing a user to re-read (that is: parse) the body of the value as many times as necessary
+/// without re-parsing its header information each time.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub(crate) struct EncodedTextValue {
+    // Each encoded text value has up to three components, appearing in the following order:
+    //
+    //     [ field_name? | annotations? | data ]
+    //
+    // Components shown with a `?` are optional.
+
+    // The following is an example encoding of a struct field with an annotated value-- the only kind
+    // of Ion value that has both of the optional components--that appears 5 gigabytes into the input
+    // stream:
+    //
+    //   ┌─── field_name_offset: 12
+    //   │      ┌─── annotations_offset: 5
+    //   │      │    ┌─── data_offset: 5_000_000_012
+    //   price: USD::55.99,
+    //   └─┬─┘  └─┬─┘└─┬─┘
+    //     │      │    └─ data_length: 5
+    //     │      └─ annotations_length: 5
+    //     └─ field_name_length: 5
+    //
+    // Notice that only `data_offset` is an absolute offset from the beginning of the stream;
+    // this is because `data` is the only field that is always guaranteed to be present.
+    // `field_name_offset` and `annotations_offset` are stored as the number of bytes _before_
+    // `data_offset`, allowing them to be stored in fewer bytes.
+
+    // The absolute position (in bytes) of this value's `data` component within the overall stream
+    // being decoded.
+    data_offset: usize,
+    // The number of bytes _before_ `data_offset` at which the field name begins. If this value
+    // does not have a field name, this value will be zero.
+    field_name_offset: u32,
+    // The number of bytes _before_ `data_offset` at which the annotations sequence begins.
+    // If this value does not have a field name, this value will be zero.
+    annotations_offset: u32,
+
+    // The number of bytes used to encode the data component of this Ion value.
+    data_length: usize,
+    // The number of bytes used to encode the field name preceding the data, if any.
+    // If there is no field name (i.e. the value is not inside a struct), this will be zero.
+    // If there is whitespace before the field name, this will not include it.
+    field_name_length: u32,
+    // The number of bytes used to encode the annotations sequence preceding the data, if any.
+    // If there is no annotations sequence, this will be zero. // If there is whitespace before the
+    // annotations sequence, this will not include it.
+    annotations_length: u32,
+
+    // Information that was recorded about the value as it was being matched.
+    // For some types (e.g. bool), matching the text is the complete parsing process so the whole
+    // value is stored. For others (e.g. a timestamp), the various components of the value are
+    // recognized during matching and partial information like subfield offsets can be stored here.
+    matched_value: MatchedValue,
+}
+
+impl EncodedTextValue {
+    pub(crate) fn new(
+        matched_value: MatchedValue,
+        offset: usize,
+        length: usize,
+    ) -> EncodedTextValue {
+        EncodedTextValue {
+            data_offset: offset,
+            data_length: length,
+            field_name_length: 0,
+            field_name_offset: 0,
+            annotations_offset: 0,
+            annotations_length: 0,
+            matched_value,
+        }
+    }
+
+    // The field name range should contain the field name literal itself without any trailing
+    // whitespace or the delimiting ':'.
+    // Examples:
+    //    foo
+    //   'foo'
+    //   "foo"
+    //    $10
+    pub(crate) fn with_field_name(mut self, offset: usize, length: usize) -> EncodedTextValue {
+        self.field_name_offset = (self.data_offset - offset) as u32;
+        self.field_name_length = length as u32;
+        self
+    }
+
+    // The annotations should include all of the symbol tokens, their delimiting '::'s, and any
+    // interstitial whitespace. It should not include any leading/trailing whitespace or the value
+    // itself.
+    // Examples:
+    //    foo::bar::
+    //    'foo'::'bar'::
+    //    foo   ::         'bar'      ::
+    pub(crate) fn with_annotations_sequence(
+        mut self,
+        offset: usize,
+        length: usize,
+    ) -> EncodedTextValue {
+        self.annotations_offset = (self.data_offset - offset) as u32;
+        self.annotations_length = length as u32;
+        self
+    }
+
+    pub fn ion_type(&self) -> IonType {
+        match self.matched_value {
+            MatchedValue::Null(ion_type) => ion_type,
+            MatchedValue::Bool(_) => IonType::Bool,
+            MatchedValue::Int(_) => IonType::Int,
+        }
+    }
+
+    pub fn is_null(&self) -> bool {
+        matches!(self.matched_value, MatchedValue::Null(_))
+    }
+
+    pub fn data_length(&self) -> usize {
+        self.data_length
+    }
+
+    pub fn data_range(&self) -> Range<usize> {
+        self.data_offset..(self.data_offset + self.data_length)
+    }
+
+    pub fn field_name_range(&self) -> Option<Range<usize>> {
+        if self.field_name_offset == 0 {
+            return None;
+        }
+        let start = self.data_offset - (self.field_name_offset as usize);
+        let end = start + (self.field_name_length as usize);
+        Some(start..end)
+    }
+
+    pub fn annotations_range(&self) -> Option<Range<usize>> {
+        if self.annotations_offset == 0 {
+            return None;
+        }
+        let start = self.data_offset - (self.annotations_offset as usize);
+        let end = start + (self.annotations_length as usize);
+        Some(start..end)
+    }
+
+    pub fn has_field_name(&self) -> bool {
+        self.field_name_offset > 0
+    }
+
+    pub fn has_annotations(&self) -> bool {
+        self.annotations_offset > 0
+    }
+
+    /// Returns the total number of bytes used to represent the current value, including the
+    /// field ID (if any), its annotations (if any), its header (type descriptor + length bytes),
+    /// and its value.
+    pub fn total_length(&self) -> usize {
+        self.data_length + u32::max(self.annotations_offset, self.field_name_offset) as usize
+    }
+
+    pub fn matched(&self) -> MatchedValue {
+        self.matched_value
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn total_length_data_only() {
+        let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12);
+        assert_eq!(value.total_length(), 12);
+    }
+
+    #[test]
+    fn total_length_data_with_field_name() {
+        let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
+            .with_field_name(90, 4);
+        assert_eq!(value.total_length(), 22);
+    }
+
+    #[test]
+    fn total_length_data_with_annotations() {
+        let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
+            .with_annotations_sequence(90, 4);
+        assert_eq!(value.total_length(), 22);
+    }
+
+    #[test]
+    fn total_length_data_with_field_name_and_annotations() {
+        let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
+            .with_field_name(90, 4)
+            .with_annotations_sequence(94, 6);
+        assert_eq!(value.total_length(), 22);
+
+        // Same test but with extra whitespace between the components
+        let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
+            .with_field_name(80, 4)
+            .with_annotations_sequence(91, 6);
+        assert_eq!(value.total_length(), 32, "{:?}", value);
+    }
+}
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
new file mode 100644
index 00000000..3f846a38
--- /dev/null
+++ b/src/lazy/text/matched.rs
@@ -0,0 +1,108 @@
+//! Types in this module represent partially parsed values from the text Ion input stream.
+//!
+//! Ion readers are not necessarily interested in every value in the input. While the binary reader
+//! is able to skip over uninteresting values using their length prefix, text readers must parse
+//! every value in the stream in order to access the ones that follow.
+//!
+//! A somewhat naive implementation of a text reader might fully read each value in the input
+//! stream eagerly, simply discarding values that the user doesn't request. This approach is
+//! technically correct, but incurs the expense of validating and materializing data that will
+//! ultimately be ignored. (As an example: consider a timestamp, which can have up to ~9 subfields
+//! to check for syntactic and semantic correctness.)
+//!
+//! In contrast, when the lazy text reader is asked for the `next()` value in the stream, it uses its
+//! Ion parser to identify the next slice of input that contains either a complete scalar value or
+//! the beginning of a container. It stores an intermediate representation (IR) of that value using
+//! one of the types defined in this module. The IR stores the value's Ion type, subfield offsets,
+//! and other information that is identified in the process of parsing the next value. Later, if the
+//! application asks to `read()` the value, the reader does not have to start from scratch. It can
+//! use the previously recorded information to minimize the amount of information that needs to be
+//! re-discovered.
+
+use crate::lazy::text::as_utf8::AsUtf8;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::result::IonFailure;
+use crate::{Int, IonResult, IonType};
+use num_bigint::BigInt;
+use num_traits::Num;
+use smallvec::SmallVec;
+use std::num::IntErrorKind;
+
+/// A partially parsed Ion value.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub(crate) enum MatchedValue {
+    Null(IonType),
+    Bool(bool),
+    Int(MatchedInt),
+    // TODO: ...the other types
+}
+
+/// A partially parsed Ion int.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub(crate) struct MatchedInt {
+    radix: u32,
+    digits_offset: usize,
+    is_negative: bool,
+}
+
+impl MatchedInt {
+    // Integers that take more than 32 bytes to represent will heap allocate a larger buffer.
+    const STACK_ALLOC_BUFFER_CAPACITY: usize = 32;
+
+    /// Constructs a new `MatchedInt`.
+    pub fn new(radix: u32, is_negative: bool, digits_offset: usize) -> Self {
+        Self {
+            radix,
+            digits_offset,
+            is_negative,
+        }
+    }
+
+    /// Whether the partially parsed int began with a `-`
+    pub fn is_negative(&self) -> bool {
+        self.is_negative
+    }
+
+    /// One of: `2`, `10`, or `16`, as determined by whether the partially parsed integer began
+    /// with a `0b`/`0B`, `0x`/`0X`, or no prefix.
+    pub fn radix(&self) -> u32 {
+        self.radix
+    }
+
+    /// Attempts to finish reading the partially parsed integer.
+    pub fn read(&self, matched_input: TextBufferView) -> IonResult<Int> {
+        let digits = matched_input.slice_to_end(self.digits_offset);
+        let mut sanitized: SmallVec<[u8; Self::STACK_ALLOC_BUFFER_CAPACITY]> =
+            SmallVec::with_capacity(Self::STACK_ALLOC_BUFFER_CAPACITY);
+        // Copy the input text over to the sanitization buffer, discarding any underscores. These
+        // are legal input, but Rust's integer `from_str_radix` method does not support them.
+        sanitized.extend(digits.bytes().iter().copied().filter(|b| *b != b'_'));
+        // Note: This UTF-8 validation step should be unnecessary as the parser only recognizes
+        //       ASCII integer characters. If this shows up in profiling, we could consider skipping it.
+        let text = sanitized.as_utf8(matched_input.offset())?;
+        let int: Int = match i64::from_str_radix(text, self.radix()) {
+            Ok(i) => i.into(),
+            Err(parse_int_error) => {
+                debug_assert!(
+                    // `from_str_radix` can fail for a variety of reasons, but our rules for matching an
+                    // int rule out most of them (empty str, invalid digit, etc). The only ones that should
+                    // happen are overflow and underflow. In those cases, we fall back to using `BigInt`.
+                    parse_int_error.kind() == &IntErrorKind::NegOverflow
+                        || parse_int_error.kind() == &IntErrorKind::PosOverflow
+                );
+
+                match BigInt::from_str_radix(text, self.radix()) {
+                    Ok(big_int) => big_int.into(),
+                    Err(_big_parse_int_error) => {
+                        return IonResult::decoding_error(format!(
+                            "unexpected error while parsing int: '{}'",
+                            std::str::from_utf8(matched_input.bytes()).unwrap_or("invalid UTF-8")
+                        ))
+                    }
+                }
+            }
+        };
+
+        Ok(int)
+    }
+}
diff --git a/src/lazy/text/mod.rs b/src/lazy/text/mod.rs
new file mode 100644
index 00000000..a9a2cea2
--- /dev/null
+++ b/src/lazy/text/mod.rs
@@ -0,0 +1,7 @@
+mod as_utf8;
+pub mod buffer;
+pub mod encoded_value;
+pub mod matched;
+pub mod parse_result;
+pub mod raw;
+pub mod value;
diff --git a/src/lazy/text/parse_result.rs b/src/lazy/text/parse_result.rs
new file mode 100644
index 00000000..7da90511
--- /dev/null
+++ b/src/lazy/text/parse_result.rs
@@ -0,0 +1,274 @@
+//! The [`nom` parser combinator crate](https://docs.rs/nom/latest/nom/) intentionally provides
+//! bare-bones error reporting by default. Each error contains only a `&str` representing the input
+//! that could not be matched and an [`ErrorKind`] enum variant indicating which `nom` parser produced
+//! the error. This stack-allocated type is very cheap to create, which is important because a
+//! typical parse will require creating large numbers of short-lived error values.
+//!
+//! This module defines `IonParseError`, a custom error type that can capture more information than is
+//! supported by [`nom::error::Error`]. It also defines `IonParseResult`, a type alias for an
+//! [`IResult`] that parses `TextBufferView`s and produces `IonParseError`s if something goes wrong.
+
+use crate::lazy::text::buffer::TextBufferView;
+use crate::position::Position;
+use crate::result::{DecodingError, IonFailure};
+use crate::{IonError, IonResult};
+use nom::error::{Error as NomError, ErrorKind, ParseError};
+use nom::{Err, IResult};
+use std::borrow::Cow;
+use std::fmt::{Debug, Display};
+
+/// A type alias for a [`IResult`] whose input is a `TextBufferView` and whose error type is an
+/// [`InvalidInputError`]. All of the Ion parsers in the `text::parsers` module return an
+/// [`IonParseResult`].
+///
+/// If the parser is successful, it will return `Ok(output_value)`. If it encounters a problem,
+/// it will return a `nom::Err<IonParseError>`. [nom::Err] is a generic enum with three possible
+/// variants:
+/// 1. `Incomplete(_)` indicates that there wasn't enough input data to determine whether the
+///    parser should match or not.
+/// 2. `Error(ion_parse_error)` indicates that the parser did not match the input text.
+/// 3. `Failure(ion_parse_error)` indicates that the parser matched the text but encountered
+///    a problem when trying to materialize it into the `output_value`. In such cases, returning a
+///    `Failure` signals that this was the correct parser to handle the input but it could not
+///    be processed successfully for some reason. For example, a parser trying to match a number of
+///    hours and minutes might match the text `11:71`, but fail when it tries to turn `71` into a
+///    number of minutes because it's `>=60`. We know this was the right parser, but it wasn't
+///    able to process it. (This is slightly contrived; it would be possible to write a parser
+///    that rejected `71` as a number of minutes based on syntax alone.)
+pub(crate) type IonParseResult<'a, O> = IResult<TextBufferView<'a>, O, IonParseError<'a>>;
+// Functions that return IonParseResult parse TextBufferView-^   ^     ^
+//                            ...return a value of type `O` -----+     |
+//         ...or a nom::Err<IonParseError> if something goes wrong ----+
+
+/// As above, but for parsers that simply identify (i.e. 'match') a slice of the input as a
+/// particular item.
+pub(crate) type IonMatchResult<'a> =
+    IResult<TextBufferView<'a>, TextBufferView<'a>, IonParseError<'a>>;
+
+#[derive(Debug, PartialEq)]
+pub enum IonParseError<'data> {
+    // When nom reports that the data was incomplete, it doesn't provide additional context.
+    Incomplete,
+    // When we encounter illegal text Ion, we'll have more information to share with the user.
+    Invalid(InvalidInputError<'data>),
+}
+
+/// Describes a problem that occurred while trying to parse a given input `TextBufferView`.
+///
+/// When returned as part of an `IonParseResult`, an `IonParseError` is always wrapped in
+/// a [nom::Err] (see `IonParseResult`'s documentation for details). If the `nom::Err` is
+/// a non-fatal `Error`, the `IonParseError`'s `description` will be `None`. If the `nom::Err` is
+/// a fatal `Failure`, the `description` will be `Some(String)`. In this way, using an
+/// `IonParseError` only incurs heap allocation costs when parsing is coming to an end.
+#[derive(Debug, PartialEq)]
+pub struct InvalidInputError<'data> {
+    // The input that being parsed when the error arose
+    input: TextBufferView<'data>,
+    // A human-friendly name for what the parser was working on when the error occurred
+    label: Option<Cow<'static, str>>,
+    // The nature of the error--what went wrong?
+    description: Option<Cow<'static, str>>,
+    // A backtrace of errors that occurred leading to this one.
+    // XXX: This is the most expensive part of error handling and is likely not very useful.
+    //      Consider removing it if it doesn't carry its weight.
+    backtrace: Vec<InvalidInputError<'data>>,
+    // The nom ErrorKind, which indicates which nom-provided parser encountered the error we're
+    // bubbling up.
+    nom_error_kind: Option<ErrorKind>,
+}
+
+impl<'data> InvalidInputError<'data> {
+    /// Constructs a new `IonParseError` from the provided `input` text.
+    pub(crate) fn new(input: TextBufferView<'data>) -> Self {
+        InvalidInputError {
+            input,
+            label: None,
+            description: None,
+            nom_error_kind: None,
+            backtrace: Vec::new(),
+        }
+    }
+
+    /// Constructs a new `IonParseError` from the provided `input` text and `description`.
+    pub(crate) fn with_label<D: Into<Cow<'static, str>>>(mut self, label: D) -> Self {
+        self.label = Some(label.into());
+        self
+    }
+
+    /// Constructs a new `IonParseError` from the provided `input` text and `description`.
+    pub(crate) fn with_description<D: Into<Cow<'static, str>>>(mut self, description: D) -> Self {
+        self.description = Some(description.into());
+        self
+    }
+
+    /// Constructs a new `IonParseError` from the provided `input` text and `description`.
+    pub(crate) fn with_nom_error_kind(mut self, nom_error_kind: ErrorKind) -> Self {
+        self.nom_error_kind = Some(nom_error_kind);
+        self
+    }
+
+    pub(crate) fn append_error(&mut self, error: InvalidInputError<'data>) {
+        self.backtrace.push(error)
+    }
+
+    /// Returns a reference to the `description` text, if any.
+    pub fn description(&self) -> Option<&str> {
+        self.description.as_deref()
+    }
+
+    pub fn label(&self) -> Option<&str> {
+        self.label.as_deref()
+    }
+
+    // TODO: Decide how to expose 'input'.
+}
+
+// impl<'data> From<InvalidInputError<'data>> for IonError {
+//     fn from(value: InvalidInputError) -> Self {
+//         dbg!(&value.backtrace);
+//         let mut message = String::from(value.description().unwrap_or("invalid text Ion syntax"));
+//         if let Some(label) = value.label {
+//             message.push_str(" while ");
+//             message.push_str(label.as_ref());
+//         }
+//         let position = Position::with_offset(value.input.offset()).with_length(value.input.len());
+//         let decoding_error = DecodingError::new(message).with_position(position);
+//         IonError::Decoding(decoding_error)
+//     }
+// }
+
+impl<'data> From<InvalidInputError<'data>> for IonParseError<'data> {
+    fn from(value: InvalidInputError<'data>) -> Self {
+        IonParseError::Invalid(value)
+    }
+}
+
+impl<'data> From<nom::Err<IonParseError<'data>>> for IonParseError<'data> {
+    fn from(value: Err<IonParseError<'data>>) -> Self {
+        match value {
+            Err::Incomplete(_) => IonParseError::Incomplete,
+            Err::Error(e) => e,
+            Err::Failure(e) => e,
+        }
+    }
+}
+
+/// Allows an `IonParseError` to be constructed from a `(&str, ErrorKind)` tuple, which is the
+/// data provided by core `nom` parsers if they do not match the input.
+impl<'data> From<(TextBufferView<'data>, ErrorKind)> for IonParseError<'data> {
+    fn from((input, error_kind): (TextBufferView<'data>, ErrorKind)) -> Self {
+        InvalidInputError::new(input)
+            .with_nom_error_kind(error_kind)
+            .into()
+    }
+}
+
+/// Allows a [nom::error::Error] to be converted into an [IonParseError] by calling `.into()`.
+impl<'data> From<NomError<TextBufferView<'data>>> for IonParseError<'data> {
+    fn from(nom_error: NomError<TextBufferView<'data>>) -> Self {
+        InvalidInputError::new(nom_error.input)
+            .with_nom_error_kind(nom_error.code)
+            .into()
+    }
+}
+
+/// Allows `IonParseError` to be used as the error type in various `nom` functions.
+impl<'data> ParseError<TextBufferView<'data>> for IonParseError<'data> {
+    fn from_error_kind(input: TextBufferView<'data>, error_kind: ErrorKind) -> Self {
+        InvalidInputError::new(input)
+            .with_nom_error_kind(error_kind)
+            .into()
+    }
+
+    fn append(input: TextBufferView<'data>, kind: ErrorKind, mut other: Self) -> Self {
+        // When an error stack is being built, this method is called to give the error
+        // type an opportunity to aggregate the errors into a collection or a more descriptive
+        // message. For now, we simply allow the most recent error to take precedence.
+        let new_error = InvalidInputError::new(input).with_nom_error_kind(kind);
+        if let IonParseError::Invalid(invalid_input_error) = &mut other {
+            invalid_input_error.backtrace.push(new_error)
+        }
+        other
+    }
+}
+
+pub(crate) trait AddContext<'data, T> {
+    fn with_context(
+        self,
+        label: impl Into<Cow<'static, str>>,
+        input: TextBufferView<'data>,
+    ) -> IonResult<(TextBufferView<'data>, T)>;
+}
+
+impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> {
+    fn with_context(
+        self,
+        label: impl Into<Cow<'static, str>>,
+        input: TextBufferView<'data>,
+    ) -> IonResult<(TextBufferView<'data>, T)> {
+        match self {
+            // No change needed in the ok case
+            Ok(matched) => Ok(matched),
+            // If the error was an incomplete
+            Err(e) => {
+                // Nom error to IonParseError
+                match IonParseError::from(e) {
+                    IonParseError::Incomplete => IonResult::incomplete(label, input.offset()),
+                    IonParseError::Invalid(invalid_input_error) => {
+                        dbg!(&invalid_input_error.backtrace);
+                        let mut message = String::from(
+                            invalid_input_error
+                                .description()
+                                .unwrap_or("invalid text Ion syntax"),
+                        );
+                        if let Some(label) = invalid_input_error.label {
+                            message.push_str(" while ");
+                            message.push_str(label.as_ref());
+                        }
+                        let position = Position::with_offset(invalid_input_error.input.offset())
+                            .with_length(invalid_input_error.input.len());
+                        let decoding_error = DecodingError::new(message).with_position(position);
+                        Err(IonError::Decoding(decoding_error))
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Constructs a `nom::Err::Failure` that contains an `IonParseError` describing the problem
+/// that was encountered.
+pub(crate) fn fatal_parse_error<D: Into<Cow<'static, str>>, O>(
+    input: TextBufferView,
+    description: D,
+) -> IonParseResult<O> {
+    Err(nom::Err::Failure(
+        InvalidInputError::new(input)
+            .with_description(description)
+            .into(),
+    ))
+}
+
+/// An extension trait that allows a [std::result::Result] of any kind to be mapped to an
+/// `IonParseResult` concisely.
+pub(crate) trait OrFatalParseError<T> {
+    fn or_fatal_parse_error<L: Display>(self, input: TextBufferView, label: L)
+        -> IonParseResult<T>;
+}
+
+/// See the documentation for [OrFatalParseError].
+impl<T, E> OrFatalParseError<T> for Result<T, E>
+where
+    E: Debug,
+{
+    fn or_fatal_parse_error<L: Display>(
+        self,
+        input: TextBufferView,
+        label: L,
+    ) -> IonParseResult<T> {
+        match self {
+            Ok(value) => Ok((input, value)),
+            Err(error) => fatal_parse_error(input, format!("{label}: {error:?}")),
+        }
+    }
+}
diff --git a/src/lazy/text/raw/mod.rs b/src/lazy/text/raw/mod.rs
new file mode 100644
index 00000000..1077754f
--- /dev/null
+++ b/src/lazy/text/raw/mod.rs
@@ -0,0 +1 @@
+pub mod reader;
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
new file mode 100644
index 00000000..dfc9f863
--- /dev/null
+++ b/src/lazy/text/raw/reader.rs
@@ -0,0 +1,189 @@
+use crate::lazy::decoder::LazyRawReader;
+use crate::lazy::encoding::TextEncoding;
+use crate::lazy::raw_stream_item::RawStreamItem;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::lazy::text::parse_result::AddContext;
+use crate::lazy::text::value::LazyRawTextValue;
+use crate::result::IonFailure;
+use crate::IonResult;
+
+/// Wraps a [`TextBufferView`], allowing the reader to advance each time an item is successfully
+/// parsed from it.
+pub(crate) struct DataSource<'data> {
+    // The buffer we're reading from
+    buffer: TextBufferView<'data>,
+    // Each time something is parsed from the buffer successfully, the caller will mark the number
+    // of bytes that may be skipped the next time `advance_to_next_item` is called.
+    bytes_to_skip: usize,
+}
+
+impl<'data> DataSource<'data> {
+    pub(crate) fn new(buffer: TextBufferView<'data>) -> DataSource<'data> {
+        DataSource {
+            buffer,
+            bytes_to_skip: 0,
+        }
+    }
+
+    pub(crate) fn buffer(&self) -> TextBufferView<'data> {
+        self.buffer
+    }
+
+    fn advance_to_next_item(&mut self) -> IonResult<TextBufferView<'data>> {
+        if self.buffer.len() < self.bytes_to_skip {
+            return IonResult::incomplete(
+                "cannot advance to next item, insufficient data in buffer",
+                self.buffer.offset(),
+            );
+        }
+
+        if self.bytes_to_skip > 0 {
+            Ok(self.buffer.consume(self.bytes_to_skip))
+        } else {
+            Ok(self.buffer)
+        }
+    }
+
+    /// Runs the provided parsing function on this DataSource's buffer.
+    /// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes
+    /// that were consumed and returns `Some(value)`.
+    /// If it does not succeed, the `DataSource` remains unchanged.
+    pub(crate) fn try_parse_next<
+        F: Fn(TextBufferView<'data>) -> IonResult<Option<LazyRawTextValue<'data>>>,
+    >(
+        &mut self,
+        parser: F,
+    ) -> IonResult<Option<LazyRawTextValue<'data>>> {
+        let buffer_after = self.advance_to_next_item()?;
+
+        let lazy_value = match parser(buffer_after) {
+            Ok(Some(output)) => output,
+            Ok(None) => return Ok(None),
+            Err(e) => return Err(e),
+        };
+
+        self.buffer = buffer_after;
+        self.bytes_to_skip = lazy_value.encoded_value.total_length();
+        Ok(Some(lazy_value))
+    }
+}
+
+/// A text Ion 1.0 reader that yields [`LazyRawTextValue`]s representing the top level values found
+/// in the provided input stream.
+pub struct LazyRawTextReader<'data> {
+    data: DataSource<'data>,
+}
+
+impl<'data> LazyRawTextReader<'data> {
+    /// Constructs a `LazyRawTextReader` positioned at the beginning of the provided input stream.
+    pub fn new(data: &'data [u8]) -> LazyRawTextReader<'data> {
+        Self::new_with_offset(data, 0)
+    }
+
+    /// Constructs a `LazyRawTextReader` positioned at the beginning of the provided input stream.
+    /// The provided input stream is itself a slice starting `offset` bytes from the beginning
+    /// of a larger data stream. This offset is used for reporting the absolute (stream-level)
+    /// position of values encountered in `data`.
+    fn new_with_offset(data: &'data [u8], offset: usize) -> LazyRawTextReader<'data> {
+        let data = DataSource::new(TextBufferView::new_with_offset(data, offset));
+        LazyRawTextReader { data }
+    }
+
+    pub fn next<'top>(&'top mut self) -> IonResult<RawStreamItem<'data, TextEncoding>>
+    where
+        'data: 'top,
+    {
+        let buffer = self.data.buffer;
+        if buffer.is_empty() {
+            return IonResult::incomplete("reading a top-level value", buffer.offset());
+        }
+        let (buffer_after_whitespace, _whitespace) = buffer
+            .match_optional_whitespace()
+            .with_context("skipping whitespace between top-level values", buffer)?;
+        let (remaining, matched) = buffer_after_whitespace
+            .read_top_level()
+            .with_context("reading a top-level value", buffer_after_whitespace)?;
+        // If we successfully moved to the next value, store the remaining buffer view
+        self.data.buffer = remaining;
+        Ok(matched)
+    }
+}
+
+impl<'data> LazyRawReader<'data, TextEncoding> for LazyRawTextReader<'data> {
+    fn new(data: &'data [u8]) -> Self {
+        LazyRawTextReader::new(data)
+    }
+
+    fn next<'a>(&'a mut self) -> IonResult<RawStreamItem<'data, TextEncoding>> {
+        self.next()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::lazy::decoder::LazyRawValue;
+    use crate::IonType;
+
+    #[test]
+    fn test_top_level() -> IonResult<()> {
+        let data = r#"
+            null
+            null.bool
+            null.int
+            false
+            true
+            500
+            0x20
+            0b0101
+        "#;
+        let mut reader = LazyRawTextReader::new(data.as_bytes());
+
+        // null
+        let lazy_untyped_null = reader.next()?.expect_value()?;
+        assert!(lazy_untyped_null.is_null());
+        assert_eq!(lazy_untyped_null.ion_type(), IonType::Null);
+
+        // null.bool
+        let lazy_null_bool = reader.next()?.expect_value()?;
+        assert!(lazy_null_bool.is_null());
+        assert_eq!(lazy_null_bool.ion_type(), IonType::Bool);
+
+        // null.int
+        let lazy_null_int = reader.next()?.expect_value()?;
+        assert!(lazy_null_int.is_null());
+        assert_eq!(lazy_null_int.ion_type(), IonType::Int);
+
+        // false
+        let lazy_bool_false = reader.next()?.expect_value()?;
+        assert!(!lazy_bool_false.is_null());
+        assert_eq!(lazy_bool_false.ion_type(), IonType::Bool);
+        assert!(!lazy_bool_false.read()?.expect_bool()?);
+
+        // true
+        let lazy_bool_true = reader.next()?.expect_value()?;
+        assert!(!lazy_bool_true.is_null());
+        assert_eq!(lazy_bool_true.ion_type(), IonType::Bool);
+        assert!(lazy_bool_true.read()?.expect_bool()?);
+
+        // 500
+        let lazy_int_decimal_500 = reader.next()?.expect_value()?;
+        assert!(!lazy_int_decimal_500.is_null());
+        assert_eq!(lazy_int_decimal_500.ion_type(), IonType::Int);
+        assert_eq!(lazy_int_decimal_500.read()?.expect_i64()?, 500);
+
+        // 0x20
+        let lazy_int_hex_20 = reader.next()?.expect_value()?;
+        assert!(!lazy_int_hex_20.is_null());
+        assert_eq!(lazy_int_hex_20.ion_type(), IonType::Int);
+        assert_eq!(lazy_int_hex_20.read()?.expect_i64()?, 0x20); // decimal 32
+
+        // 0b0101
+        let lazy_int_binary_0101 = reader.next()?.expect_value()?;
+        assert!(!lazy_int_binary_0101.is_null());
+        assert_eq!(lazy_int_binary_0101.ion_type(), IonType::Int);
+        assert_eq!(lazy_int_binary_0101.read()?.expect_i64()?, 0b0101); // decimal 5
+
+        Ok(())
+    }
+}
diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs
new file mode 100644
index 00000000..e586f677
--- /dev/null
+++ b/src/lazy/text/value.rs
@@ -0,0 +1,66 @@
+use crate::lazy::decoder::private::LazyRawValuePrivate;
+use crate::lazy::decoder::{LazyDecoder, LazyRawValue};
+use crate::lazy::encoding::TextEncoding;
+use crate::lazy::raw_value_ref::RawValueRef;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::lazy::text::encoded_value::EncodedTextValue;
+use crate::lazy::text::matched::MatchedValue;
+use crate::{IonResult, IonType, RawSymbolTokenRef};
+use std::fmt;
+use std::fmt::{Debug, Formatter};
+
+/// A value that has been identified in the text input stream but whose data has not yet been read.
+///
+/// If only part of the value is in the input buffer, calls to [`LazyRawTextValue::read`] (which examines
+/// bytes beyond the value's header) may return [`IonError::Incomplete`](crate::result::IonError::Incomplete).
+///
+/// `LazyRawTextValue`s are "unresolved," which is to say that symbol values, annotations, and
+/// struct field names may or may not include a text definition. (This is less common in Ion's text
+/// format than in its binary format, but is still possible.) For a resolved lazy value that
+/// includes a text definition for these items whenever one exists, see
+/// [`crate::lazy::value::LazyValue`].
+#[derive(Clone)]
+pub struct LazyRawTextValue<'data> {
+    pub(crate) encoded_value: EncodedTextValue,
+    pub(crate) input: TextBufferView<'data>,
+}
+
+impl<'data> LazyRawValuePrivate<'data> for LazyRawTextValue<'data> {
+    fn field_name(&self) -> Option<RawSymbolTokenRef<'data>> {
+        todo!()
+    }
+}
+
+impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> {
+    fn ion_type(&self) -> IonType {
+        self.encoded_value.ion_type()
+    }
+
+    fn is_null(&self) -> bool {
+        self.encoded_value.is_null()
+    }
+
+    fn annotations(&self) -> <TextEncoding as LazyDecoder<'data>>::AnnotationsIterator {
+        todo!()
+    }
+
+    fn read(&self) -> IonResult<RawValueRef<'data, TextEncoding>> {
+        let matched_input = self.input.slice(0, self.encoded_value.data_length());
+        let value_ref = match self.encoded_value.matched() {
+            MatchedValue::Null(ion_type) => RawValueRef::Null(ion_type),
+            MatchedValue::Bool(b) => RawValueRef::Bool(b),
+            MatchedValue::Int(i) => RawValueRef::Int(i.read(matched_input)?),
+        };
+        Ok(value_ref)
+    }
+}
+
+impl<'a> Debug for LazyRawTextValue<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "LazyRawTextValue {{\n  val={:?},\n  buf={:?}\n}}\n",
+            self.encoded_value, self.input
+        )
+    }
+}
diff --git a/src/lazy/value.rs b/src/lazy/value.rs
index d7ae1b64..8f09cdbf 100644
--- a/src/lazy/value.rs
+++ b/src/lazy/value.rs
@@ -1,5 +1,5 @@
-use crate::lazy::binary::encoding::BinaryEncoding;
 use crate::lazy::decoder::{LazyDecoder, LazyRawValue};
+use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::r#struct::LazyStruct;
 use crate::lazy::sequence::LazySequence;
 use crate::lazy::value_ref::ValueRef;
diff --git a/src/position.rs b/src/position.rs
index bb5d9648..413d82db 100644
--- a/src/position.rs
+++ b/src/position.rs
@@ -7,6 +7,7 @@ use std::fmt::{Display, Error};
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Position {
     pub(crate) byte_offset: usize,
+    pub(crate) byte_length: Option<usize>,
     pub(crate) line_column: Option<(usize, usize)>,
 }
 
@@ -16,16 +17,20 @@ impl Position {
     pub fn with_offset(offset: usize) -> Self {
         Position {
             byte_offset: offset,
+            byte_length: None,
             line_column: None,
         }
     }
 
+    pub fn with_length(mut self, length: usize) -> Self {
+        self.byte_length = Some(length);
+        self
+    }
+
     /// Add line and column information to the current Position.
-    pub fn with_line_and_column(&self, line: usize, column: usize) -> Self {
-        Position {
-            line_column: Some((line, column)),
-            ..*self
-        }
+    pub fn with_line_and_column(mut self, line: usize, column: usize) -> Self {
+        self.line_column = Some((line, column));
+        self
     }
 
     /// Returns the offset from the start of the Ion stream in bytes.
@@ -33,17 +38,22 @@ impl Position {
         self.byte_offset
     }
 
-    /// If available returns the text position as line and column offsets.
+    /// If available, returns the length of the input slice in question.
+    pub fn byte_length(&self) -> Option<usize> {
+        self.byte_length
+    }
+
+    /// If available, returns the text position as line and column offsets.
     pub fn line_and_column(&self) -> Option<(usize, usize)> {
         self.line_column
     }
 
-    /// If available returns the line component of the text position.
+    /// If available, returns the line component of the text position.
     pub fn line(&self) -> Option<usize> {
         self.line_column.map(|(line, _column)| line)
     }
 
-    /// If available returns the column component of the text position.
+    /// If available, returns the column component of the text position.
     pub fn column(&self) -> Option<usize> {
         self.line_column.map(|(_line, column)| column)
     }
diff --git a/src/result/decoding_error.rs b/src/result/decoding_error.rs
index e2fb39af..ade5d501 100644
--- a/src/result/decoding_error.rs
+++ b/src/result/decoding_error.rs
@@ -1,3 +1,4 @@
+use crate::position::Position;
 use std::borrow::Cow;
 use thiserror::Error;
 
@@ -6,12 +7,23 @@ use thiserror::Error;
 #[error("{description}")]
 pub struct DecodingError {
     description: Cow<'static, str>,
+    position: Option<Position>,
 }
 
 impl DecodingError {
     pub(crate) fn new(description: impl Into<Cow<'static, str>>) -> Self {
         DecodingError {
             description: description.into(),
+            position: None,
         }
     }
+
+    pub(crate) fn with_position(mut self, position: impl Into<Position>) -> Self {
+        self.position = Some(position.into());
+        self
+    }
+
+    pub fn position(&self) -> Option<&Position> {
+        self.position.as_ref()
+    }
 }
diff --git a/src/result/incomplete.rs b/src/result/incomplete.rs
index c47c721d..896b9a16 100644
--- a/src/result/incomplete.rs
+++ b/src/result/incomplete.rs
@@ -1,4 +1,5 @@
 use crate::position::Position;
+use std::borrow::Cow;
 use thiserror::Error;
 
 /// For non-blocking readers, indicates that there was not enough data available in the input buffer
@@ -6,14 +7,14 @@ use thiserror::Error;
 #[derive(Clone, Debug, Error, PartialEq)]
 #[error("ran out of input while reading {label} at offset {position}")]
 pub struct IncompleteError {
-    label: &'static str,
+    label: Cow<'static, str>,
     position: Position,
 }
 
 impl IncompleteError {
-    pub(crate) fn new(label: &'static str, position: impl Into<Position>) -> Self {
+    pub(crate) fn new(label: impl Into<Cow<'static, str>>, position: impl Into<Position>) -> Self {
         IncompleteError {
-            label,
+            label: label.into(),
             position: position.into(),
         }
     }
diff --git a/src/result/mod.rs b/src/result/mod.rs
index 8b47a476..157184ac 100644
--- a/src/result/mod.rs
+++ b/src/result/mod.rs
@@ -82,14 +82,14 @@ pub(crate) trait IonFailure {
     // an `IonError::Io` is by converting a `std::io::IoError` with the ? operator.
     // Because this trait is only crate-visible, methods can be added/changed as needed in
     // the future.
-    fn incomplete(label: &'static str, position: impl Into<Position>) -> Self;
+    fn incomplete(label: impl Into<Cow<'static, str>>, position: impl Into<Position>) -> Self;
     fn decoding_error<S: Into<Cow<'static, str>>>(description: S) -> Self;
     fn encoding_error<S: Into<Cow<'static, str>>>(description: S) -> Self;
     fn illegal_operation<S: Into<Cow<'static, str>>>(operation: S) -> Self;
 }
 
 impl IonFailure for IonError {
-    fn incomplete(label: &'static str, position: impl Into<Position>) -> Self {
+    fn incomplete(label: impl Into<Cow<'static, str>>, position: impl Into<Position>) -> Self {
         IncompleteError::new(label, position).into()
     }
 
@@ -107,7 +107,7 @@ impl IonFailure for IonError {
 }
 
 impl<T> IonFailure for IonResult<T> {
-    fn incomplete(label: &'static str, position: impl Into<Position>) -> Self {
+    fn incomplete(label: impl Into<Cow<'static, str>>, position: impl Into<Position>) -> Self {
         Err(IonError::incomplete(label, position))
     }
 

From 89f79aa1b67820c1ef364a111ceca36f8b498edd Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Tue, 25 Jul 2023 08:36:00 -0400
Subject: [PATCH 02/15] Consolidate impls of AsUtf8 w/helper fn

---
 src/lazy/text/as_utf8.rs | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/lazy/text/as_utf8.rs b/src/lazy/text/as_utf8.rs
index 0d1e211c..9be4784c 100644
--- a/src/lazy/text/as_utf8.rs
+++ b/src/lazy/text/as_utf8.rs
@@ -14,20 +14,20 @@ pub(crate) trait AsUtf8 {
 
 impl<const N: usize> AsUtf8 for SmallVec<[u8; N]> {
     fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
-        std::str::from_utf8(self.as_ref()).map_err(|_| {
-            let decoding_error =
-                DecodingError::new("encountered invalid UTF-8").with_position(position);
-            IonError::Decoding(decoding_error)
-        })
+        bytes_as_utf8(self.as_ref(), position)
     }
 }
 
 impl<'data> AsUtf8 for TextBufferView<'data> {
     fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
-        std::str::from_utf8(self.bytes()).map_err(|_| {
-            let decoding_error =
-                DecodingError::new("encountered invalid UTF-8").with_position(position);
-            IonError::Decoding(decoding_error)
-        })
+        bytes_as_utf8(self.bytes(), position)
     }
 }
+
+fn bytes_as_utf8(bytes: &[u8], position: impl Into<Position>) -> IonResult<&str> {
+    std::str::from_utf8(bytes).map_err(|_| {
+        let decoding_error =
+            DecodingError::new("encountered invalid UTF-8").with_position(position);
+        IonError::Decoding(decoding_error)
+    })
+}

From 840be4d62783ebb754314dee018e655ca7570d73 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Tue, 25 Jul 2023 10:05:27 -0400
Subject: [PATCH 03/15] Improved TextBufferView docs, removed DataSource

---
 src/lazy/text/buffer.rs     | 70 ++++++++++++++++----------------
 src/lazy/text/matched.rs    |  1 +
 src/lazy/text/raw/reader.rs | 80 ++++++-------------------------------
 3 files changed, 49 insertions(+), 102 deletions(-)

diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index 54ecf4f6..c12ec11c 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -58,11 +58,10 @@ const WHITESPACE_CHARACTERS_AS_STR: &str = " \t\r\n\x09\x0B\x0C";
 /// A slice of unsigned bytes that can be cheaply copied and which defines methods for parsing
 /// the various encoding elements of a text Ion stream.
 ///
-/// Upon success, each parsing method on the `TextBufferView` will return the value that was read
-/// and a new copy of the `TextBufferView` that starts _after_ the bytes that were parsed.
-///
-/// Methods that begin with `match_` return the input slice that they matched OR a `MatchedValue`
-/// that retains additional information found during the matching process.
+/// Parsing methods have names that begin with `match_` and each return a `(match, remaining_input)`
+/// pair. The `match` may be either the slice of the input that was matched (represented as another
+/// `TextBufferView`) or a `MatchedValue` that retains information discovered during parsing that
+/// will be useful if the match is later fully materialized into a value.
 #[derive(PartialEq, Clone, Copy)]
 pub(crate) struct TextBufferView<'a> {
     // `data` is a slice of remaining data in the larger input stream.
@@ -79,17 +78,21 @@ pub(crate) struct TextBufferView<'a> {
 pub(crate) type ParseResult<'a, T> = IonResult<(T, TextBufferView<'a>)>;
 
 impl<'data> TextBufferView<'data> {
-    /// Constructs a new `TextBufferView` that wraps `data`.
+    /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to zero.
     #[inline]
     pub fn new(data: &[u8]) -> TextBufferView {
         Self::new_with_offset(data, 0)
     }
 
+    /// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to the
+    /// specified value. This is useful when `data` is a slice from the middle of a larger stream.
+    /// Note that `offset` is the index of the larger stream at which `data` begins and not an
+    /// offset _into_ `data`.
     pub fn new_with_offset(data: &[u8], offset: usize) -> TextBufferView {
         TextBufferView { data, offset }
     }
 
-    /// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues for
+    /// Returns a subslice of the [`TextBufferView`] that starts at `offset` and continues for
     /// `length` bytes.
     ///
     /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the
@@ -101,7 +104,7 @@ impl<'data> TextBufferView<'data> {
         }
     }
 
-    /// Returns a subslice copy of the [`TextBufferView`] that starts at `offset` and continues
+    /// Returns a subslice of the [`TextBufferView`] that starts at `offset` and continues
     /// to the end.
     ///
     /// Note that `offset` is relative to the beginning of the buffer, not the beginning of the
@@ -134,48 +137,44 @@ impl<'data> TextBufferView<'data> {
         self.data.is_empty()
     }
 
-    /// Creates a copy of this `TextBufferView` that begins `num_bytes_to_consume` further into the
-    /// slice.
-    #[inline]
-    pub fn consume(&self, num_bytes_to_consume: usize) -> Self {
-        // This assertion is always run during testing but is removed in the release build.
-        debug_assert!(num_bytes_to_consume <= self.len());
-        Self {
-            data: &self.data[num_bytes_to_consume..],
-            offset: self.offset + num_bytes_to_consume,
-        }
+    pub fn match_whitespace(self) -> IonMatchResult<'data> {
+        is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
     }
 
-    // An adapter for nom::combinator::success.
-    // Always succeeds and consumes none of the input. Returns an empty slice of the buffer.
-    pub fn match_nothing(self) -> IonMatchResult<'data> {
-        // Return an empty slice from the head position
+    /// Always succeeds and consumes none of the input. Returns an empty slice of the buffer.
+    // This method is useful for parsers that need to match an optional construct but don't want
+    // to return an Option<_>. For an example, see its use in `match_optional_whitespace`.
+    fn match_nothing(self) -> IonMatchResult<'data> {
+        // Use nom's `success` parser to return an empty slice from the head position
         success(self.slice(0, 0))(self)
     }
 
-    pub fn match_whitespace(self) -> IonMatchResult<'data> {
-        is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
-    }
-
+    /// Matches zero or more whitespace characters.
     pub fn match_optional_whitespace(self) -> IonMatchResult<'data> {
         // Either match whitespace and return what follows or just return the input as-is.
-        // This will always return `Ok`, but is packaged as an IonMatchResult for compatability
+        // This will always return `Ok`, but it is packaged as an IonMatchResult for compatability
+        // with other parsers.
         alt((Self::match_whitespace, Self::match_nothing))(self)
     }
 
-    pub fn read_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
-        let (remaining, value) = match self.read_value() {
+    /// Matches a single top-level scalar value, the beginning of a container, or an IVM.
+    pub fn match_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
+        let (remaining, value) = match self.match_value() {
             Ok(value) => value,
             Err(e) => return Err(e),
         };
 
+        // TODO: Augment this method to take an `is_complete` flag that indicates whether the absence
+        //       of further values should return an `Incomplete` or a `RawStreamItem::EndOfStream`.
+
         // TODO: Check to see if `value` is actually an IVM.
         //       => If it's a symbol, try the IVM parser on it and see if it succeeds.
         //       For now, we just return the value.
         Ok((remaining, RawStreamItem::Value(value)))
     }
 
-    pub fn read_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> {
+    /// Matches a single scalar value or the beginning of a container.
+    pub fn match_value(self) -> IonParseResult<'data, LazyRawTextValue<'data>> {
         alt((
             // For `null` and `bool`, we use `read_` instead of `match_` because there's no additional
             // parsing to be done.
@@ -202,10 +201,12 @@ impl<'data> TextBufferView<'data> {
         .parse(self)
     }
 
+    /// Matches a boolean value.
     pub fn match_bool(self) -> IonMatchResult<'data> {
         recognize(Self::read_bool)(self)
     }
 
+    /// Matches and returns a boolean value.
     pub fn read_bool(self) -> IonParseResult<'data, bool> {
         terminated(
             alt((value(true, tag("true")), value(false, tag("false")))),
@@ -213,10 +214,12 @@ impl<'data> TextBufferView<'data> {
         )(self)
     }
 
+    /// Matches any type of null. (`null`, `null.null`, `null.int`, etc)
     pub fn match_null(self) -> IonMatchResult<'data> {
         recognize(Self::read_null)(self)
     }
 
+    /// Matches and returns a null value.
     pub fn read_null(self) -> IonParseResult<'data, IonType> {
         delimited(
             tag("null"),
@@ -227,10 +230,7 @@ impl<'data> TextBufferView<'data> {
         .parse(self)
     }
 
-    fn match_ion_type(self) -> IonMatchResult<'data> {
-        recognize(Self::read_ion_type)(self)
-    }
-
+    /// Matches and returns an Ion type.
     fn read_ion_type(self) -> IonParseResult<'data, IonType> {
         alt((
             value(IonType::Null, tag("null")),
@@ -249,10 +249,12 @@ impl<'data> TextBufferView<'data> {
         ))(self)
     }
 
+    /// Matches any one of Ion's stop characters.
     fn match_stop_character(self) -> IonMatchResult<'data> {
         recognize(one_of("{}[](),\"' \t\n\r\u{0b}\u{0c}")).parse(self)
     }
 
+    /// Matches--but does not consume--any one of Ion's stop characters.
     fn peek_stop_character(self) -> IonMatchResult<'data> {
         peek(Self::match_stop_character).parse(self)
     }
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index 3f846a38..b3f79056 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -31,6 +31,7 @@ use std::num::IntErrorKind;
 /// A partially parsed Ion value.
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub(crate) enum MatchedValue {
+    // `Null` and `Bool` are fully parsed because they only involve matching a keyword.
     Null(IonType),
     Bool(bool),
     Int(MatchedInt),
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index dfc9f863..22eedf61 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -3,77 +3,19 @@ use crate::lazy::encoding::TextEncoding;
 use crate::lazy::raw_stream_item::RawStreamItem;
 use crate::lazy::text::buffer::TextBufferView;
 use crate::lazy::text::parse_result::AddContext;
-use crate::lazy::text::value::LazyRawTextValue;
 use crate::result::IonFailure;
 use crate::IonResult;
 
-/// Wraps a [`TextBufferView`], allowing the reader to advance each time an item is successfully
-/// parsed from it.
-pub(crate) struct DataSource<'data> {
-    // The buffer we're reading from
+/// A text Ion 1.0 reader that yields [`RawStreamItem`]s representing the top level values found
+/// in the provided input stream.
+pub struct LazyRawTextReader<'data> {
+    // The current view of the data we're reading from.
     buffer: TextBufferView<'data>,
     // Each time something is parsed from the buffer successfully, the caller will mark the number
-    // of bytes that may be skipped the next time `advance_to_next_item` is called.
+    // of bytes that may be skipped the next time the reader advances.
     bytes_to_skip: usize,
 }
 
-impl<'data> DataSource<'data> {
-    pub(crate) fn new(buffer: TextBufferView<'data>) -> DataSource<'data> {
-        DataSource {
-            buffer,
-            bytes_to_skip: 0,
-        }
-    }
-
-    pub(crate) fn buffer(&self) -> TextBufferView<'data> {
-        self.buffer
-    }
-
-    fn advance_to_next_item(&mut self) -> IonResult<TextBufferView<'data>> {
-        if self.buffer.len() < self.bytes_to_skip {
-            return IonResult::incomplete(
-                "cannot advance to next item, insufficient data in buffer",
-                self.buffer.offset(),
-            );
-        }
-
-        if self.bytes_to_skip > 0 {
-            Ok(self.buffer.consume(self.bytes_to_skip))
-        } else {
-            Ok(self.buffer)
-        }
-    }
-
-    /// Runs the provided parsing function on this DataSource's buffer.
-    /// If it succeeds, marks the `DataSource` as ready to advance by the 'n' bytes
-    /// that were consumed and returns `Some(value)`.
-    /// If it does not succeed, the `DataSource` remains unchanged.
-    pub(crate) fn try_parse_next<
-        F: Fn(TextBufferView<'data>) -> IonResult<Option<LazyRawTextValue<'data>>>,
-    >(
-        &mut self,
-        parser: F,
-    ) -> IonResult<Option<LazyRawTextValue<'data>>> {
-        let buffer_after = self.advance_to_next_item()?;
-
-        let lazy_value = match parser(buffer_after) {
-            Ok(Some(output)) => output,
-            Ok(None) => return Ok(None),
-            Err(e) => return Err(e),
-        };
-
-        self.buffer = buffer_after;
-        self.bytes_to_skip = lazy_value.encoded_value.total_length();
-        Ok(Some(lazy_value))
-    }
-}
-
-/// A text Ion 1.0 reader that yields [`LazyRawTextValue`]s representing the top level values found
-/// in the provided input stream.
-pub struct LazyRawTextReader<'data> {
-    data: DataSource<'data>,
-}
-
 impl<'data> LazyRawTextReader<'data> {
     /// Constructs a `LazyRawTextReader` positioned at the beginning of the provided input stream.
     pub fn new(data: &'data [u8]) -> LazyRawTextReader<'data> {
@@ -85,15 +27,17 @@ impl<'data> LazyRawTextReader<'data> {
     /// of a larger data stream. This offset is used for reporting the absolute (stream-level)
     /// position of values encountered in `data`.
     fn new_with_offset(data: &'data [u8], offset: usize) -> LazyRawTextReader<'data> {
-        let data = DataSource::new(TextBufferView::new_with_offset(data, offset));
-        LazyRawTextReader { data }
+        LazyRawTextReader {
+            buffer: TextBufferView::new_with_offset(data, offset),
+            bytes_to_skip: 0,
+        }
     }
 
     pub fn next<'top>(&'top mut self) -> IonResult<RawStreamItem<'data, TextEncoding>>
     where
         'data: 'top,
     {
-        let buffer = self.data.buffer;
+        let buffer = self.buffer;
         if buffer.is_empty() {
             return IonResult::incomplete("reading a top-level value", buffer.offset());
         }
@@ -101,10 +45,10 @@ impl<'data> LazyRawTextReader<'data> {
             .match_optional_whitespace()
             .with_context("skipping whitespace between top-level values", buffer)?;
         let (remaining, matched) = buffer_after_whitespace
-            .read_top_level()
+            .match_top_level()
             .with_context("reading a top-level value", buffer_after_whitespace)?;
         // If we successfully moved to the next value, store the remaining buffer view
-        self.data.buffer = remaining;
+        self.buffer = remaining;
         Ok(matched)
     }
 }

From 5db1ff0b8748487e33a9e3d3069e2c1966eaa7fb Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Thu, 27 Jul 2023 13:30:34 -0400
Subject: [PATCH 04/15] Adds lazy text floats

---
 src/lazy/text/buffer.rs        | 156 ++++++++++++++++++++++++++++++++-
 src/lazy/text/encoded_value.rs |   1 +
 src/lazy/text/matched.rs       |  57 ++++++++++--
 src/lazy/text/parse_result.rs  |  68 +++++++++-----
 src/lazy/text/raw/reader.rs    |  45 ++++++++++
 src/lazy/text/value.rs         |   1 +
 6 files changed, 297 insertions(+), 31 deletions(-)

diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index c12ec11c..e7f7d393 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -1,7 +1,7 @@
 use crate::lazy::encoding::TextEncoding;
 use crate::lazy::raw_stream_item::RawStreamItem;
 use crate::lazy::text::encoded_value::EncodedTextValue;
-use crate::lazy::text::matched::{MatchedInt, MatchedValue};
+use crate::lazy::text::matched::{MatchedFloat, MatchedInt, MatchedValue};
 use crate::lazy::text::parse_result::IonParseError;
 use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
 use crate::lazy::text::value::LazyRawTextValue;
@@ -12,7 +12,7 @@ use nom::character::streaming::{char, digit1, one_of};
 use nom::combinator::{map, opt, peek, recognize, success, value};
 use nom::error::{ErrorKind, ParseError};
 use nom::multi::many0_count;
-use nom::sequence::{delimited, pair, preceded, separated_pair, terminated};
+use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
 use nom::{CompareResult, IResult, InputLength, InputTake, Needed, Parser};
 use std::fmt::{Debug, Formatter};
 use std::iter::{Copied, Enumerate};
@@ -192,6 +192,12 @@ impl<'data> TextBufferView<'data> {
                     EncodedTextValue::new(MatchedValue::Int(matched_int), self.offset(), length)
                 },
             ),
+            map(
+                match_and_length(Self::match_float),
+                |(matched_float, length)| {
+                    EncodedTextValue::new(MatchedValue::Float(matched_float), self.offset(), length)
+                },
+            ),
             // TODO: The other Ion types
         ))
         .map(|encoded_value| LazyRawTextValue {
@@ -372,6 +378,111 @@ impl<'data> TextBufferView<'data> {
     fn take_base_16_digits1(self) -> IonMatchResult<'data> {
         take_while1(|b: u8| b.is_ascii_hexdigit())(self)
     }
+
+    /// Matches an Ion float of any syntax
+    fn match_float(self) -> IonParseResult<'data, MatchedFloat> {
+        alt((
+            Self::match_float_special_value,
+            Self::match_float_numeric_value,
+        ))(self)
+    }
+
+    /// Matches special IEEE-754 floating point values, including +/- infinity and NaN.
+    fn match_float_special_value(self) -> IonParseResult<'data, MatchedFloat> {
+        alt((
+            value(MatchedFloat::NotANumber, tag("nan")),
+            value(MatchedFloat::PositiveInfinity, tag("+inf")),
+            value(MatchedFloat::NegativeInfinity, tag("-inf")),
+        ))(self)
+    }
+
+    /// Matches numeric IEEE-754 floating point values.
+    fn match_float_numeric_value(self) -> IonParseResult<'data, MatchedFloat> {
+        terminated(
+            recognize(pair(
+                Self::match_number_with_optional_dot_and_digits,
+                Self::match_float_exponent_marker_and_digits,
+            )),
+            Self::peek_stop_character,
+        )
+        .map(|_matched| MatchedFloat::Numeric)
+        .parse(self)
+    }
+
+    /// Matches a number that may or may not have a decimal place and trailing fractional digits.
+    /// If a decimal place is present, there must also be trailing digits.
+    /// For example:
+    ///   1000
+    ///   1000.559
+    ///   -25.2
+    fn match_number_with_optional_dot_and_digits(self) -> IonMatchResult<'data> {
+        recognize(tuple((
+            opt(tag("-")),
+            Self::match_base_10_digits_before_dot,
+            opt(Self::match_dot_followed_by_base_10_digits),
+        )))(self)
+    }
+
+    /// In a float or decimal, matches the digits that are permitted before the decimal point.
+    /// This includes either a single zero, or a non-zero followed by any sequence of digits.
+    fn match_digits_before_dot(self) -> IonMatchResult<'data> {
+        alt((
+            tag("0"),
+            recognize(pair(Self::match_leading_digit, Self::match_trailing_digits)),
+        ))(self)
+    }
+
+    /// Matches a single non-zero base 10 digit.
+    fn match_leading_digit(self) -> IonMatchResult<'data> {
+        recognize(one_of("123456789"))(self)
+    }
+
+    /// Matches any number of base 10 digits, allowing underscores at any position except the end.
+    fn match_trailing_digits(self) -> IonMatchResult<'data> {
+        recognize(many0_count(preceded(opt(char('_')), digit1)))(self)
+    }
+
+    /// Recognizes a decimal point followed by any number of base-10 digits.
+    fn match_dot_followed_by_base_10_digits(self) -> IonMatchResult<'data> {
+        recognize(preceded(tag("."), opt(Self::match_digits_after_dot)))(self)
+    }
+
+    /// Like `match_digits_before_dot`, but allows leading zeros.
+    fn match_digits_after_dot(self) -> IonMatchResult<'data> {
+        recognize(terminated(
+            // Zero or more digits-followed-by-underscores
+            many0_count(pair(digit1, char('_'))),
+            // One or more digits
+            digit1,
+        ))(self)
+    }
+
+    /// Matches an `e` or `E` followed by an optional sign (`+` or `-`) followed by one or more
+    /// base 10 digits.
+    fn match_float_exponent_marker_and_digits(self) -> IonMatchResult<'data> {
+        preceded(one_of("eE"), Self::match_exponent_sign_and_digits)(self)
+    }
+
+    /// Recognizes the exponent portion of a decimal (everything after the 'd') or float
+    /// (everything after the 'e'). This includes:
+    /// * an optional '+' OR '-'
+    /// * any number of decimal digits, which may:
+    ///    * have underscores in between them: `1_000_000`
+    ///    * have one or more leading zeros: `0005`
+    fn match_exponent_sign_and_digits(self) -> IonMatchResult<'data> {
+        recognize(pair(
+            // Optional leading sign; if there's no sign, it's not negative.
+            opt(Self::match_any_sign),
+            Self::match_digits_after_dot,
+        ))(self)
+    }
+
+    /// Matches `-` OR `+`.
+    ///
+    /// This is used for matching exponent signs; most places in Ion do not allow `+`.
+    pub fn match_any_sign(self) -> IonMatchResult<'data> {
+        alt((tag("+"), tag("-")))(self)
+    }
 }
 
 // === nom trait implementations ===
@@ -602,7 +713,12 @@ mod tests {
         {
             let result = self.try_match(parser);
             // We expect this to fail for one reason or another
-            result.unwrap_err();
+            assert!(
+                result.is_err(),
+                "Expected a parse failure for input: {:?}\nResult: {:?}",
+                self.input,
+                result
+            );
         }
     }
 
@@ -729,4 +845,38 @@ mod tests {
             mismatch_int(input);
         }
     }
+
+    #[test]
+    fn test_match_float() {
+        fn match_float(input: &str) {
+            MatchTest::new(input).expect_match(match_length(TextBufferView::match_float));
+        }
+        fn mismatch_float(input: &str) {
+            MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_float));
+        }
+
+        let good_inputs = &[
+            "0.0e0", "0E0", "0e0", "305e1", "305e+1", "305e-1", "305e100", "305e-100", "305e+100",
+            "305.0e1", "0.279e3", "279e0", "279.5e0", "279.5E0",
+        ];
+        for input in good_inputs {
+            match_float(input);
+            let negative = format!("-{input}");
+            match_float(&negative);
+        }
+
+        let bad_inputs = &[
+            "305",      // Integer
+            "305e",     // Has exponent delimiter but no exponent
+            ".305e",    // No digits before the decimal point
+            "305e0.5",  // Fractional exponent
+            "305e-0.5", // Negative fractional exponent
+            "0305e1",   // Leading zero
+            "+305e1",   // Leading plus sign
+            "--305e1",  // Multiple negative signs
+        ];
+        for input in bad_inputs {
+            mismatch_float(input);
+        }
+    }
 }
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
index e1a3bcc1..0c649d08 100644
--- a/src/lazy/text/encoded_value.rs
+++ b/src/lazy/text/encoded_value.rs
@@ -114,6 +114,7 @@ impl EncodedTextValue {
             MatchedValue::Null(ion_type) => ion_type,
             MatchedValue::Bool(_) => IonType::Bool,
             MatchedValue::Int(_) => IonType::Int,
+            MatchedValue::Float(_) => IonType::Float,
         }
     }
 
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index b3f79056..560d828e 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -19,14 +19,17 @@
 //! use the previously recorded information to minimize the amount of information that needs to be
 //! re-discovered.
 
-use crate::lazy::text::as_utf8::AsUtf8;
-use crate::lazy::text::buffer::TextBufferView;
-use crate::result::IonFailure;
-use crate::{Int, IonResult, IonType};
+use std::num::IntErrorKind;
+
 use num_bigint::BigInt;
 use num_traits::Num;
 use smallvec::SmallVec;
-use std::num::IntErrorKind;
+
+use crate::lazy::text::as_utf8::AsUtf8;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::lazy::text::parse_result::InvalidInputError;
+use crate::result::IonFailure;
+use crate::{Int, IonError, IonResult, IonType};
 
 /// A partially parsed Ion value.
 #[derive(Copy, Clone, Debug, PartialEq)]
@@ -35,6 +38,7 @@ pub(crate) enum MatchedValue {
     Null(IonType),
     Bool(bool),
     Int(MatchedInt),
+    Float(MatchedFloat),
     // TODO: ...the other types
 }
 
@@ -107,3 +111,46 @@ impl MatchedInt {
         Ok(int)
     }
 }
+
+/// A partially parsed Ion float.
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub(crate) enum MatchedFloat {
+    /// `+inf`
+    PositiveInfinity,
+    /// `-inf`
+    NegativeInfinity,
+    /// `nan`
+    NotANumber,
+    /// Any numeric float value
+    Numeric,
+}
+
+impl MatchedFloat {
+    // Floats that take more than 32 bytes of text to represent will heap allocate a larger buffer.
+    const STACK_ALLOC_BUFFER_CAPACITY: usize = 32;
+
+    pub fn read(&self, matched_input: TextBufferView) -> IonResult<f64> {
+        use std::str::FromStr;
+
+        match self {
+            MatchedFloat::PositiveInfinity => return Ok(f64::INFINITY),
+            MatchedFloat::NegativeInfinity => return Ok(f64::NEG_INFINITY),
+            MatchedFloat::NotANumber => return Ok(f64::NAN),
+            MatchedFloat::Numeric => {} // fall through
+        };
+
+        let mut sanitized: SmallVec<[u8; Self::STACK_ALLOC_BUFFER_CAPACITY]> =
+            SmallVec::with_capacity(Self::STACK_ALLOC_BUFFER_CAPACITY);
+        sanitized.extend(matched_input.bytes().iter().copied().filter(|b| *b != b'_'));
+
+        let text = sanitized.as_utf8(matched_input.offset())?;
+        let float = f64::from_str(text).map_err(|e| {
+            let error: IonError = InvalidInputError::new(matched_input)
+                .with_description(format!("encountered an unexpected error ({:?})", e))
+                .with_label("parsing a float")
+                .into();
+            error
+        })?;
+        Ok(float)
+    }
+}
diff --git a/src/lazy/text/parse_result.rs b/src/lazy/text/parse_result.rs
index 7da90511..6dfb919b 100644
--- a/src/lazy/text/parse_result.rs
+++ b/src/lazy/text/parse_result.rs
@@ -143,6 +143,25 @@ impl<'data> From<InvalidInputError<'data>> for IonParseError<'data> {
     }
 }
 
+// We cannot provide an analogous impl for `Incomplete` because it is missing necessary data.
+impl<'data> From<InvalidInputError<'data>> for IonError {
+    fn from(invalid_input_error: InvalidInputError) -> Self {
+        let mut message = String::from(
+            invalid_input_error
+                .description()
+                .unwrap_or("invalid Ion syntax encountered"),
+        );
+        if let Some(label) = invalid_input_error.label {
+            message.push_str(" while ");
+            message.push_str(label.as_ref());
+        }
+        let position = Position::with_offset(invalid_input_error.input.offset())
+            .with_length(invalid_input_error.input.len());
+        let decoding_error = DecodingError::new(message).with_position(position);
+        IonError::Decoding(decoding_error)
+    }
+}
+
 impl<'data> From<nom::Err<IonParseError<'data>>> for IonParseError<'data> {
     fn from(value: Err<IonParseError<'data>>) -> Self {
         match value {
@@ -200,6 +219,31 @@ pub(crate) trait AddContext<'data, T> {
     ) -> IonResult<(TextBufferView<'data>, T)>;
 }
 
+impl<'data, T> AddContext<'data, T> for nom::Err<IonParseError<'data>> {
+    fn with_context(
+        self,
+        label: impl Into<Cow<'static, str>>,
+        input: TextBufferView<'data>,
+    ) -> IonResult<(TextBufferView<'data>, T)> {
+        let ipe = IonParseError::from(self);
+        ipe.with_context(label, input)
+    }
+}
+
+// Turns an IonParseError into an IonResult
+impl<'data, T> AddContext<'data, T> for IonParseError<'data> {
+    fn with_context(
+        self,
+        label: impl Into<Cow<'static, str>>,
+        input: TextBufferView<'data>,
+    ) -> IonResult<(TextBufferView<'data>, T)> {
+        match self {
+            IonParseError::Incomplete => IonResult::incomplete(label, input.offset()),
+            IonParseError::Invalid(invalid_input_error) => Err(IonError::from(invalid_input_error)),
+        }
+    }
+}
+
 impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> {
     fn with_context(
         self,
@@ -209,29 +253,7 @@ impl<'data, T> AddContext<'data, T> for IonParseResult<'data, T> {
         match self {
             // No change needed in the ok case
             Ok(matched) => Ok(matched),
-            // If the error was an incomplete
-            Err(e) => {
-                // Nom error to IonParseError
-                match IonParseError::from(e) {
-                    IonParseError::Incomplete => IonResult::incomplete(label, input.offset()),
-                    IonParseError::Invalid(invalid_input_error) => {
-                        dbg!(&invalid_input_error.backtrace);
-                        let mut message = String::from(
-                            invalid_input_error
-                                .description()
-                                .unwrap_or("invalid text Ion syntax"),
-                        );
-                        if let Some(label) = invalid_input_error.label {
-                            message.push_str(" while ");
-                            message.push_str(label.as_ref());
-                        }
-                        let position = Position::with_offset(invalid_input_error.input.offset())
-                            .with_length(invalid_input_error.input.len());
-                        let decoding_error = DecodingError::new(message).with_position(position);
-                        Err(IonError::Decoding(decoding_error))
-                    }
-                }
-            }
+            Err(e) => e.with_context(label, input),
         }
     }
 }
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 22eedf61..94048d80 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -80,6 +80,12 @@ mod tests {
             500
             0x20
             0b0101
+            +inf
+            -inf
+            nan
+            3.6e0
+            2.5e23
+            -318e-2
         "#;
         let mut reader = LazyRawTextReader::new(data.as_bytes());
 
@@ -128,6 +134,45 @@ mod tests {
         assert_eq!(lazy_int_binary_0101.ion_type(), IonType::Int);
         assert_eq!(lazy_int_binary_0101.read()?.expect_i64()?, 0b0101); // decimal 5
 
+        // +inf
+        let lazy_float_pos_inf = reader.next()?.expect_value()?;
+        assert!(!lazy_float_pos_inf.is_null());
+        assert_eq!(lazy_float_pos_inf.ion_type(), IonType::Float);
+        assert_eq!(lazy_float_pos_inf.read()?.expect_float()?, f64::INFINITY);
+
+        // -inf
+        let lazy_float_neg_inf = reader.next()?.expect_value()?;
+        assert!(!lazy_float_neg_inf.is_null());
+        assert_eq!(lazy_float_neg_inf.ion_type(), IonType::Float);
+        assert_eq!(
+            lazy_float_neg_inf.read()?.expect_float()?,
+            f64::NEG_INFINITY
+        );
+
+        // nan
+        let lazy_float_neg_inf = reader.next()?.expect_value()?;
+        assert!(!lazy_float_neg_inf.is_null());
+        assert_eq!(lazy_float_neg_inf.ion_type(), IonType::Float);
+        assert!(lazy_float_neg_inf.read()?.expect_float()?.is_nan());
+
+        // 3.6e0
+        let lazy_float = reader.next()?.expect_value()?;
+        assert!(!lazy_float.is_null());
+        assert_eq!(lazy_float.ion_type(), IonType::Float);
+        assert_eq!(lazy_float.read()?.expect_float()?, 3.6f64);
+
+        // 2.25e23
+        let lazy_float = reader.next()?.expect_value()?;
+        assert!(!lazy_float.is_null());
+        assert_eq!(lazy_float.ion_type(), IonType::Float);
+        assert_eq!(lazy_float.read()?.expect_float()?, 2.5f64 * 10f64.powi(23));
+
+        // -3.14
+        let lazy_float = reader.next()?.expect_value()?;
+        assert!(!lazy_float.is_null());
+        assert_eq!(lazy_float.ion_type(), IonType::Float);
+        assert_eq!(lazy_float.read()?.expect_float()?, -3.18);
+
         Ok(())
     }
 }
diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs
index e586f677..1842ddd5 100644
--- a/src/lazy/text/value.rs
+++ b/src/lazy/text/value.rs
@@ -50,6 +50,7 @@ impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> {
             MatchedValue::Null(ion_type) => RawValueRef::Null(ion_type),
             MatchedValue::Bool(b) => RawValueRef::Bool(b),
             MatchedValue::Int(i) => RawValueRef::Int(i.read(matched_input)?),
+            MatchedValue::Float(f) => RawValueRef::Float(f.read(matched_input)?),
         };
         Ok(value_ref)
     }

From 07d4a70c547cd731831b6a96c274112fd81dc45b Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Thu, 27 Jul 2023 14:21:20 -0400
Subject: [PATCH 05/15] Adds LazyRawTextReader support for comments

---
 src/lazy/text/buffer.rs     | 57 ++++++++++++++++++++++++++++++++++++-
 src/lazy/text/raw/reader.rs | 20 +++++++++++--
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index e7f7d393..c74be95c 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -7,7 +7,7 @@ use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
 use crate::lazy::text::value::LazyRawTextValue;
 use crate::{IonResult, IonType};
 use nom::branch::alt;
-use nom::bytes::streaming::{is_a, tag, take_while1};
+use nom::bytes::streaming::{is_a, is_not, tag, take_until, take_while1};
 use nom::character::streaming::{char, digit1, one_of};
 use nom::combinator::{map, opt, peek, recognize, success, value};
 use nom::error::{ErrorKind, ParseError};
@@ -157,6 +157,55 @@ impl<'data> TextBufferView<'data> {
         alt((Self::match_whitespace, Self::match_nothing))(self)
     }
 
+    /// Matches any amount of contiguous comments and whitespace, including none.
+    pub fn match_optional_comments_and_whitespace(self) -> IonMatchResult<'data> {
+        recognize(many0_count(alt((
+            Self::match_whitespace,
+            Self::match_comment,
+        ))))(self)
+    }
+
+    /// Matches a single
+    ///     // Rest-of-the-line
+    /// or
+    ///     /* multi
+    ///        line */
+    /// comment
+    pub fn match_comment(self) -> IonMatchResult<'data> {
+        alt((
+            Self::match_rest_of_line_comment,
+            Self::match_multiline_comment,
+        ))(self)
+    }
+
+    /// Matches a single rest-of-the-line comment.
+    fn match_rest_of_line_comment(self) -> IonMatchResult<'data> {
+        preceded(
+            // Matches a leading "//"...
+            tag("//"),
+            // ...followed by either...
+            alt((
+                // ...one or more non-EOL characters...
+                is_not("\r\n"),
+                // ...or any EOL character.
+                peek(recognize(one_of("\r\n"))),
+                // In either case, the line ending will not be consumed.
+            )),
+        )(self)
+    }
+
+    /// Matches a single multiline comment.
+    fn match_multiline_comment(self) -> IonMatchResult<'data> {
+        recognize(delimited(
+            // Matches a leading "/*"...
+            tag("/*"),
+            // ...any number of non-"*/" characters...
+            take_until("*/"),
+            // ...and then a closing "*/"
+            tag("*/"),
+        ))(self)
+    }
+
     /// Matches a single top-level scalar value, the beginning of a container, or an IVM.
     pub fn match_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
         let (remaining, value) = match self.match_value() {
@@ -569,6 +618,12 @@ impl<'data> nom::Slice<RangeTo<usize>> for TextBufferView<'data> {
     }
 }
 
+impl<'data> nom::FindSubstring<&str> for TextBufferView<'data> {
+    fn find_substring(&self, substr: &str) -> Option<usize> {
+        self.data.find_substring(substr)
+    }
+}
+
 impl<'data> nom::InputTakeAtPosition for TextBufferView<'data> {
     type Item = u8;
 
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 94048d80..8b41a5e5 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -42,8 +42,11 @@ impl<'data> LazyRawTextReader<'data> {
             return IonResult::incomplete("reading a top-level value", buffer.offset());
         }
         let (buffer_after_whitespace, _whitespace) = buffer
-            .match_optional_whitespace()
-            .with_context("skipping whitespace between top-level values", buffer)?;
+            .match_optional_comments_and_whitespace()
+            .with_context(
+                "skipping comments and whitespace between top-level values",
+                buffer,
+            )?;
         let (remaining, matched) = buffer_after_whitespace
             .match_top_level()
             .with_context("reading a top-level value", buffer_after_whitespace)?;
@@ -72,14 +75,27 @@ mod tests {
     #[test]
     fn test_top_level() -> IonResult<()> {
         let data = r#"
+        /*
+            This test demonstrates lazily reading top-level values
+            of various Ion types. The values are interspersed with
+            different kinds of comments and whitespace.
+        */
+        
+        // Typed nulls
             null
             null.bool
             null.int
+            
+        // Booleans
             false
             true
+        
+        // Integers
             500
             0x20
             0b0101
+        
+        // Floats
             +inf
             -inf
             nan

From 181e0a548ccac7734dab064e77461125d981440d Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Fri, 28 Jul 2023 17:36:01 -0400
Subject: [PATCH 06/15] Adds LazyRawTextReader support for reading strings

---
 src/lazy/binary/raw/value.rs   |   3 +-
 src/lazy/mod.rs                |   1 +
 src/lazy/raw_value_ref.rs      |  27 +++-
 src/lazy/str_ref.rs            |  82 ++++++++++++
 src/lazy/system_reader.rs      |   4 +-
 src/lazy/text/as_utf8.rs       |   6 +
 src/lazy/text/buffer.rs        | 134 +++++++++++++++++--
 src/lazy/text/encoded_value.rs |   7 +-
 src/lazy/text/matched.rs       | 234 ++++++++++++++++++++++++++++++++-
 src/lazy/text/parse_result.rs  |  28 ++--
 src/lazy/text/raw/reader.rs    | 191 ++++++++++++++-------------
 src/lazy/text/value.rs         |   7 +-
 src/lazy/value_ref.rs          |  12 +-
 13 files changed, 605 insertions(+), 131 deletions(-)
 create mode 100644 src/lazy/str_ref.rs

diff --git a/src/lazy/binary/raw/value.rs b/src/lazy/binary/raw/value.rs
index 9ed2340e..a8cb397f 100644
--- a/src/lazy/binary/raw/value.rs
+++ b/src/lazy/binary/raw/value.rs
@@ -9,6 +9,7 @@ use crate::lazy::decoder::private::LazyRawValuePrivate;
 use crate::lazy::decoder::LazyRawValue;
 use crate::lazy::encoding::BinaryEncoding;
 use crate::lazy::raw_value_ref::RawValueRef;
+use crate::lazy::str_ref::StrRef;
 use crate::result::IonFailure;
 use crate::types::SymbolId;
 use crate::{Decimal, Int, IonError, IonResult, IonType, RawSymbolTokenRef, Timestamp};
@@ -390,7 +391,7 @@ impl<'data> LazyRawBinaryValue<'data> {
         let raw_bytes = self.value_body()?;
         let text = std::str::from_utf8(raw_bytes)
             .map_err(|_| IonError::decoding_error("found a string with invalid utf-8 data"))?;
-        Ok(RawValueRef::String(text))
+        Ok(RawValueRef::String(StrRef::from(text)))
     }
 
     /// Helper method called by [`Self::read`]. Reads the current value as a blob.
diff --git a/src/lazy/mod.rs b/src/lazy/mod.rs
index 3f42baa8..af004584 100644
--- a/src/lazy/mod.rs
+++ b/src/lazy/mod.rs
@@ -8,6 +8,7 @@ pub mod raw_stream_item;
 pub mod raw_value_ref;
 pub mod reader;
 pub mod sequence;
+pub mod str_ref;
 pub mod r#struct;
 pub mod system_reader;
 pub mod system_stream_item;
diff --git a/src/lazy/raw_value_ref.rs b/src/lazy/raw_value_ref.rs
index a0da98eb..5e76db66 100644
--- a/src/lazy/raw_value_ref.rs
+++ b/src/lazy/raw_value_ref.rs
@@ -1,4 +1,5 @@
 use crate::lazy::decoder::LazyDecoder;
+use crate::lazy::str_ref::StrRef;
 use crate::result::IonFailure;
 use crate::{Decimal, Int, IonResult, IonType, RawSymbolTokenRef, Timestamp};
 use std::fmt::{Debug, Formatter};
@@ -15,7 +16,7 @@ pub enum RawValueRef<'data, D: LazyDecoder<'data>> {
     Float(f64),
     Decimal(Decimal),
     Timestamp(Timestamp),
-    String(&'data str),
+    String(StrRef<'data>),
     Symbol(RawSymbolTokenRef<'data>),
     Blob(&'data [u8]),
     Clob(&'data [u8]),
@@ -24,6 +25,28 @@ pub enum RawValueRef<'data, D: LazyDecoder<'data>> {
     Struct(D::Struct),
 }
 
+// Provides equality for scalar types, but not containers.
+impl<'data, D: LazyDecoder<'data>> PartialEq for RawValueRef<'data, D> {
+    fn eq(&self, other: &Self) -> bool {
+        use RawValueRef::*;
+        match (self, other) {
+            (Null(i1), Null(i2)) => i1 == i2,
+            (Bool(b1), Bool(b2)) => b1 == b2,
+            (Int(i1), Int(i2)) => i1 == i2,
+            (Float(f1), Float(f2)) => f1 == f2,
+            (Decimal(d1), Decimal(d2)) => d1 == d2,
+            (Timestamp(t1), Timestamp(t2)) => t1 == t2,
+            (String(s1), String(s2)) => s1 == s2,
+            (Symbol(s1), Symbol(s2)) => s1 == s2,
+            (Blob(b1), Blob(b2)) => b1 == b2,
+            (Clob(c1), Clob(c2)) => c1 == c2,
+            // We cannot compare lazy containers as we cannot guarantee that their complete contents
+            // are available in the buffer. Is `{foo: bar}` equal to `{foo: b`?
+            _ => false,
+        }
+    }
+}
+
 impl<'data, D: LazyDecoder<'data>> Debug for RawValueRef<'data, D> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -101,7 +124,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> {
         }
     }
 
-    pub fn expect_string(self) -> IonResult<&'data str> {
+    pub fn expect_string(self) -> IonResult<StrRef<'data>> {
         if let RawValueRef::String(s) = self {
             Ok(s)
         } else {
diff --git a/src/lazy/str_ref.rs b/src/lazy/str_ref.rs
new file mode 100644
index 00000000..17161e0d
--- /dev/null
+++ b/src/lazy/str_ref.rs
@@ -0,0 +1,82 @@
+use crate::text::text_formatter::IonValueFormatter;
+use crate::Str;
+use std::borrow::Cow;
+use std::fmt::{Display, Formatter};
+use std::ops::Deref;
+
+#[derive(Clone, PartialEq, Debug)]
+pub struct StrRef<'data> {
+    text: Cow<'data, str>,
+}
+
+impl<'data> StrRef<'data> {
+    pub fn to_owned(&self) -> Str {
+        Str::from(self.as_ref())
+    }
+
+    pub fn into_owned(self) -> Str {
+        Str::from(self)
+    }
+
+    pub fn text(&self) -> &str {
+        self.as_ref()
+    }
+}
+
+impl<'data> Deref for StrRef<'data> {
+    type Target = str;
+
+    fn deref(&self) -> &Self::Target {
+        self.text.as_ref()
+    }
+}
+
+impl<'data> PartialEq<str> for StrRef<'data> {
+    fn eq(&self, other: &str) -> bool {
+        self.text() == other
+    }
+}
+
+impl<'data> PartialEq<&str> for StrRef<'data> {
+    fn eq(&self, other: &&str) -> bool {
+        self.text() == *other
+    }
+}
+
+impl<'data> PartialEq<StrRef<'data>> for str {
+    fn eq(&self, other: &StrRef<'data>) -> bool {
+        self == other.text()
+    }
+}
+
+impl<'data> Display for StrRef<'data> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let mut formatter = IonValueFormatter { output: f };
+        formatter
+            .format_string(self.text())
+            .map_err(|_| std::fmt::Error)
+    }
+}
+
+impl<'a> From<&'a str> for StrRef<'a> {
+    fn from(value: &'a str) -> Self {
+        StrRef {
+            text: Cow::from(value),
+        }
+    }
+}
+
+impl<'a> From<String> for StrRef<'a> {
+    fn from(value: String) -> Self {
+        StrRef {
+            text: Cow::from(value),
+        }
+    }
+}
+
+impl<'data> From<StrRef<'data>> for Str {
+    fn from(str_ref: StrRef<'data>) -> Self {
+        let text: String = str_ref.text.into_owned();
+        Str::from(text)
+    }
+}
diff --git a/src/lazy/system_reader.rs b/src/lazy/system_reader.rs
index bdf76de2..bee0458a 100644
--- a/src/lazy/system_reader.rs
+++ b/src/lazy/system_reader.rs
@@ -235,8 +235,8 @@ impl<'data, D: LazyDecoder<'data>> LazySystemReader<'data, D> {
     fn process_symbols(pending_lst: &mut PendingLst, symbols: &D::Value) -> IonResult<()> {
         if let RawValueRef::List(list) = symbols.read()? {
             for symbol_text in list.iter() {
-                if let RawValueRef::String(text) = symbol_text?.read()? {
-                    pending_lst.symbols.push(Some(text.to_owned()))
+                if let RawValueRef::String(str_ref) = symbol_text?.read()? {
+                    pending_lst.symbols.push(Some(str_ref.text().to_owned()))
                 } else {
                     pending_lst.symbols.push(None)
                 }
diff --git a/src/lazy/text/as_utf8.rs b/src/lazy/text/as_utf8.rs
index 9be4784c..69dfa46e 100644
--- a/src/lazy/text/as_utf8.rs
+++ b/src/lazy/text/as_utf8.rs
@@ -12,6 +12,12 @@ pub(crate) trait AsUtf8 {
     fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str>;
 }
 
+impl AsUtf8 for [u8] {
+    fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
+        bytes_as_utf8(self, position)
+    }
+}
+
 impl<const N: usize> AsUtf8 for SmallVec<[u8; N]> {
     fn as_utf8(&self, position: impl Into<Position>) -> IonResult<&str> {
         bytes_as_utf8(self.as_ref(), position)
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index c74be95c..d7e0bc64 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -1,11 +1,8 @@
-use crate::lazy::encoding::TextEncoding;
-use crate::lazy::raw_stream_item::RawStreamItem;
-use crate::lazy::text::encoded_value::EncodedTextValue;
-use crate::lazy::text::matched::{MatchedFloat, MatchedInt, MatchedValue};
-use crate::lazy::text::parse_result::IonParseError;
-use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
-use crate::lazy::text::value::LazyRawTextValue;
-use crate::{IonResult, IonType};
+use std::fmt::{Debug, Formatter};
+use std::iter::{Copied, Enumerate};
+use std::ops::{RangeFrom, RangeTo};
+use std::slice::Iter;
+
 use nom::branch::alt;
 use nom::bytes::streaming::{is_a, is_not, tag, take_until, take_while1};
 use nom::character::streaming::{char, digit1, one_of};
@@ -14,10 +11,18 @@ use nom::error::{ErrorKind, ParseError};
 use nom::multi::many0_count;
 use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
 use nom::{CompareResult, IResult, InputLength, InputTake, Needed, Parser};
-use std::fmt::{Debug, Formatter};
-use std::iter::{Copied, Enumerate};
-use std::ops::{RangeFrom, RangeTo};
-use std::slice::Iter;
+
+use crate::lazy::encoding::TextEncoding;
+use crate::lazy::raw_stream_item::RawStreamItem;
+use crate::lazy::text::encoded_value::EncodedTextValue;
+use crate::lazy::text::matched::{
+    MatchedFloat, MatchedInt, MatchedShortString, MatchedString, MatchedValue,
+};
+use crate::lazy::text::parse_result::IonParseError;
+use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
+use crate::lazy::text::value::LazyRawTextValue;
+use crate::result::DecodingError;
+use crate::{IonError, IonResult, IonType};
 
 impl<'a> Debug for TextBufferView<'a> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
@@ -137,6 +142,19 @@ impl<'data> TextBufferView<'data> {
         self.data.is_empty()
     }
 
+    /// Attempts to view the contents of the buffer as a UTF-8 `&str`.
+    pub fn as_text<'a>(&'a self) -> IonResult<&'data str> {
+        // On its surface, this method very closely resembles the `AsUtf8` trait's method.
+        // However, this one returns a `&'data str` instead of a `&'a str`, which is to say
+        // that the string that's returned lives as long as the data itself, not just the duration
+        // of the lifetime introduced by this method call.
+        std::str::from_utf8(&self.data).map_err(move |_| {
+            let decoding_error =
+                DecodingError::new("encountered invalid UTF-8").with_position(self.offset());
+            IonError::Decoding(decoding_error)
+        })
+    }
+
     pub fn match_whitespace(self) -> IonMatchResult<'data> {
         is_a(WHITESPACE_CHARACTERS_AS_STR)(self)
     }
@@ -247,6 +265,16 @@ impl<'data> TextBufferView<'data> {
                     EncodedTextValue::new(MatchedValue::Float(matched_float), self.offset(), length)
                 },
             ),
+            map(
+                match_and_length(Self::match_string),
+                |(matched_string, length)| {
+                    EncodedTextValue::new(
+                        MatchedValue::String(matched_string),
+                        self.offset(),
+                        length,
+                    )
+                },
+            ),
             // TODO: The other Ion types
         ))
         .map(|encoded_value| LazyRawTextValue {
@@ -532,6 +560,49 @@ impl<'data> TextBufferView<'data> {
     pub fn match_any_sign(self) -> IonMatchResult<'data> {
         alt((tag("+"), tag("-")))(self)
     }
+
+    /// Matches short- or long-form string.
+    fn match_string(self) -> IonParseResult<'data, MatchedString> {
+        alt((Self::match_short_string, Self::match_long_string))(self)
+    }
+
+    /// Matches a short string. For example: `"foo"`
+    fn match_short_string(self) -> IonParseResult<'data, MatchedString> {
+        delimited(char('"'), Self::match_short_string_body, char('"'))
+            .map(|(_matched, contains_escaped_chars)| {
+                MatchedString::Short(MatchedShortString::new(contains_escaped_chars))
+            })
+            .parse(self)
+    }
+
+    /// Returns a matched buffer and a boolean indicating whether any escaped characters were
+    /// found in the short string.
+    fn match_short_string_body(self) -> IonParseResult<'data, (Self, bool)> {
+        let mut is_escaped = false;
+        let mut contains_escaped_chars = false;
+        for (index, byte) in self.bytes().iter().enumerate() {
+            if is_escaped {
+                // If we're escaped, the previous byte was a \ and we ignore this one.
+                is_escaped = false;
+                continue;
+            }
+            if *byte == b'\\' {
+                is_escaped = true;
+                contains_escaped_chars = true;
+                continue;
+            }
+            if *byte == b'\"' {
+                let matched = self.slice(0, index);
+                let remaining = self.slice_to_end(index);
+                return Ok((remaining, (matched, contains_escaped_chars)));
+            }
+        }
+        Err(nom::Err::Incomplete(Needed::Unknown))
+    }
+
+    fn match_long_string(self) -> IonParseResult<'data, MatchedString> {
+        todo!()
+    }
 }
 
 // === nom trait implementations ===
@@ -934,4 +1005,43 @@ mod tests {
             mismatch_float(input);
         }
     }
+
+    #[test]
+    fn test_match_string() {
+        fn match_string(input: &str) {
+            MatchTest::new(input).expect_match(match_length(TextBufferView::match_string));
+        }
+        fn mismatch_string(input: &str) {
+            MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_string));
+        }
+
+        let good_inputs = &[
+            r#"
+            "hello"
+            "#,
+            r#"
+            "😀😀😀"
+            "#,
+            r#"
+            "this has an escaped quote \" right in the middle"
+            "#,
+        ];
+        for input in good_inputs {
+            match_string(input);
+        }
+
+        let bad_inputs = &[
+            // Missing an opening quote
+            r#"
+            hello"
+            "#,
+            // Missing a trailing quote
+            r#"
+            "hello
+            "#,
+        ];
+        for input in bad_inputs {
+            mismatch_string(input);
+        }
+    }
 }
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
index 0c649d08..17779c2d 100644
--- a/src/lazy/text/encoded_value.rs
+++ b/src/lazy/text/encoded_value.rs
@@ -8,7 +8,7 @@ use std::ops::Range;
 /// Each [`LazyRawTextValue`](crate::lazy::text::value::LazyRawTextValue) contains an `EncodedValue`,
 /// allowing a user to re-read (that is: parse) the body of the value as many times as necessary
 /// without re-parsing its header information each time.
-#[derive(Clone, Copy, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub(crate) struct EncodedTextValue {
     // Each encoded text value has up to three components, appearing in the following order:
     //
@@ -115,6 +115,7 @@ impl EncodedTextValue {
             MatchedValue::Bool(_) => IonType::Bool,
             MatchedValue::Int(_) => IonType::Int,
             MatchedValue::Float(_) => IonType::Float,
+            MatchedValue::String(_) => IonType::String,
         }
     }
 
@@ -163,8 +164,8 @@ impl EncodedTextValue {
         self.data_length + u32::max(self.annotations_offset, self.field_name_offset) as usize
     }
 
-    pub fn matched(&self) -> MatchedValue {
-        self.matched_value
+    pub fn matched(&self) -> &MatchedValue {
+        &self.matched_value
     }
 }
 
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index 560d828e..7c21b3e2 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -19,26 +19,30 @@
 //! use the previously recorded information to minimize the amount of information that needs to be
 //! re-discovered.
 
+use nom::character::is_hex_digit;
 use std::num::IntErrorKind;
+use std::ops::Range;
 
 use num_bigint::BigInt;
 use num_traits::Num;
 use smallvec::SmallVec;
 
+use crate::lazy::str_ref::StrRef;
 use crate::lazy::text::as_utf8::AsUtf8;
 use crate::lazy::text::buffer::TextBufferView;
 use crate::lazy::text::parse_result::InvalidInputError;
-use crate::result::IonFailure;
+use crate::result::{DecodingError, IonFailure};
 use crate::{Int, IonError, IonResult, IonType};
 
 /// A partially parsed Ion value.
-#[derive(Copy, Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub(crate) enum MatchedValue {
     // `Null` and `Bool` are fully parsed because they only involve matching a keyword.
     Null(IonType),
     Bool(bool),
     Int(MatchedInt),
     Float(MatchedFloat),
+    String(MatchedString),
     // TODO: ...the other types
 }
 
@@ -154,3 +158,229 @@ impl MatchedFloat {
         Ok(float)
     }
 }
+
+#[derive(Clone, Debug, PartialEq)]
+pub(crate) enum MatchedString {
+    /// The string only has one segment. (e.g. "foo")
+    Short(MatchedShortString),
+    /// The string is in multiple segments:
+    ///     """hello,"""
+    ///     """ world!"""
+    Long(MatchedLongString),
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub(crate) struct MatchedLongString {
+    // Keep a list of all the string segment ranges we found.
+    // If the user asks to read the string, we'll collate the segments into a single string.
+    slices: Vec<Range<usize>>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub(crate) struct MatchedShortString {
+    contains_escaped_chars: bool,
+}
+
+impl MatchedShortString {
+    pub fn new(contains_escaped_chars: bool) -> Self {
+        Self {
+            contains_escaped_chars,
+        }
+    }
+    pub fn contains_escaped_chars(&self) -> bool {
+        self.contains_escaped_chars
+    }
+}
+
+impl MatchedString {
+    // Strings longer than 64 bytes will allocate a larger space on the heap.
+    const STACK_ALLOC_BUFFER_CAPACITY: usize = 64;
+
+    pub fn read<'a, 'data>(
+        &'a self,
+        matched_input: TextBufferView<'data>,
+    ) -> IonResult<StrRef<'data>> {
+        match self {
+            MatchedString::Short(short) => self.read_short_string(*short, matched_input),
+            MatchedString::Long(_) => todo!("long-form strings"),
+        }
+    }
+
+    fn read_short_string<'a, 'data>(
+        &'a self,
+        short: MatchedShortString,
+        matched_input: TextBufferView<'data>,
+    ) -> IonResult<StrRef<'data>> {
+        // Take a slice of the input that ignores the first and last bytes, which are quotes.
+        let body = matched_input.slice(1, matched_input.len() - 2);
+        if !short.contains_escaped_chars() {
+            // There are no escaped characters, so we can just validate the string in-place.
+            let text = body.as_text()?;
+            let str_ref = StrRef::from(text);
+            return Ok(str_ref);
+        }
+        // Otherwise, there are escaped characters. We need to build a new version of our string
+        // that replaces the escaped characters with their corresponding bytes.
+        let mut sanitized = Vec::with_capacity(matched_input.len());
+
+        Self::escape_short_string(body, &mut sanitized)?;
+        let text = String::from_utf8(sanitized).unwrap();
+        Ok(StrRef::from(text.to_string()))
+    }
+
+    fn escape_short_string(
+        matched_input: TextBufferView,
+        sanitized: &mut Vec<u8>,
+    ) -> IonResult<()> {
+        let mut remaining = matched_input;
+        while !remaining.is_empty() {
+            let next_escape = remaining.bytes().iter().position(|byte| *byte == b'\\');
+            remaining = if let Some(escape_offset) = next_escape {
+                // Everything up to the '\' is already clean. Write that slice to 'sanitized'.
+                let already_clean = remaining.slice(0, escape_offset);
+                sanitized.extend_from_slice(already_clean.bytes());
+                // Everything starting from the '\' needs to be evaluated.
+                let contains_escapes = remaining.slice_to_end(escape_offset);
+                Self::write_escaped(contains_escapes, sanitized)?
+            } else {
+                sanitized.extend_from_slice(remaining.bytes());
+                // 'remaining' is now empty
+                remaining.slice_to_end(remaining.len())
+            };
+        }
+
+        Ok(())
+    }
+
+    fn write_escaped<'data>(
+        input: TextBufferView<'data>,
+        sanitized: &mut Vec<u8>,
+    ) -> IonResult<TextBufferView<'data>> {
+        // Note that by the time this method has been called, the parser has already confirmed that
+        // there is an appropriate closing delimiter. Thus, if any of the branches below run out of
+        // data, it means that it's a fatal error and not just an Incomplete.
+        debug_assert!(!input.is_empty());
+        debug_assert!(input.bytes()[0] == b'\\');
+        if input.len() == 1 {
+            return Err(IonError::Decoding(
+                DecodingError::new("found an escape ('\\') with no subsequent character")
+                    .with_position(input.offset()),
+            ));
+        }
+        let input_after_escape = input.slice_to_end(2); // After (e.g.) '\x'
+        let escape_id = input.bytes()[1];
+        let substitute = match escape_id {
+            b'n' => b'\n',
+            b'r' => b'\r',
+            b't' => b'\t',
+            b'\\' => b'\\',
+            b'/' => b'/',
+            b'"' => b'"',
+            b'\'' => b'\'',
+            b'?' => b'?',
+            b'0' => 0x00u8, // NUL
+            b'a' => 0x07u8, // alert BEL
+            b'b' => 0x08u8, // backspace
+            b'v' => 0x0Bu8, // vertical tab
+            b'f' => 0x0Cu8, // form feed
+            // If the byte following the '\' is a real newline (that is: 0x0A), we discard it.
+            b'\n' => return Ok(input_after_escape),
+            // These cases require more sophisticated parsing, not just a 1-to-1 mapping of bytes
+            b'x' => return Self::hex_digits_code_point(2, input_after_escape, sanitized),
+            b'u' => return Self::hex_digits_code_point(4, input_after_escape, sanitized),
+            b'U' => return Self::hex_digits_code_point(8, input_after_escape, sanitized),
+            _ => {
+                return Err(IonError::Decoding(
+                    DecodingError::new(format!("invalid escape sequence '\\{}", escape_id))
+                        .with_position(input.offset()),
+                ))
+            }
+        };
+
+        sanitized.push(substitute);
+        Ok(input_after_escape)
+    }
+
+    fn hex_digits_code_point<'a, 'data>(
+        num_digits: usize,
+        input: TextBufferView<'data>,
+        sanitized: &'a mut Vec<u8>,
+    ) -> IonResult<TextBufferView<'data>> {
+        if input.len() < num_digits {
+            return Err(IonError::Decoding(
+                DecodingError::new(format!(
+                    "found a {}-hex-digit escape sequence with only {} digits",
+                    num_digits,
+                    input.len()
+                ))
+                .with_position(input.offset()),
+            ));
+        }
+
+        let hex_digit_bytes = &input.bytes()[..num_digits];
+
+        let all_are_hex_digits = hex_digit_bytes
+            .iter()
+            .take(num_digits)
+            .copied()
+            .all(is_hex_digit);
+        if !all_are_hex_digits {
+            return Err(IonError::Decoding(
+                DecodingError::new(format!(
+                    "found a {}-hex-digit escape sequence that contained an invalid hex digit",
+                    num_digits,
+                ))
+                .with_position(input.offset()),
+            ));
+        }
+        // We just confirmed all of the digits are ASCII hex digits, so these steps cannot fail.
+        let hex_digits = std::str::from_utf8(hex_digit_bytes).unwrap();
+        let code_point = u32::from_str_radix(hex_digits, 16).unwrap();
+
+        // Check to see if this is a high surrogate; if it is, our code point isn't complete. Another
+        // unicode escape representing the low surrogate has to be next in the input to complete it.
+        // See the docs for this helper function for details. (Note: this will only ever be true for
+        // 4- and 8-digit escape sequences. `\x` escapes don't have enough digits to represent a
+        // high surrogate.)
+        if code_point_is_a_high_surrogate(code_point) {
+            todo!("support surrogate pairs")
+        }
+
+        // A Rust `char` can represent any Unicode scalar value--a code point that is not part of a
+        // surrogate pair. If the value we found isn't a high surrogate, then it's a complete scalar
+        // value. We can safely convert it to a `char`.
+        let character = char::from_u32(code_point).unwrap();
+        let utf8_buffer: &mut [u8; 4] = &mut [0; 4];
+        let utf8_encoded = character.encode_utf8(utf8_buffer);
+        sanitized.extend_from_slice(utf8_encoded.as_bytes());
+
+        // Skip beyond the digits we just processed
+        Ok(input.slice_to_end(num_digits))
+    }
+}
+
+/// Returns `true` if the provided code point is a utf-16 high surrogate.
+///
+/// Terse primer: Unicode text is made up of a stream of unsigned integers called 'code points'.
+/// What a person might think of as a 'character' (for example: 'a', '本', or '🥸') can be made up
+/// of one or more code points.
+///
+/// A single code point can require up to 21 bits. Depending on which Unicode encoding you're using,
+/// these 21 bits can come with different amounts of additional overhead bits:
+/// * In utf-8, a code point can be 1, 2, 3, or 4 bytes, with some bits in each byte being used
+///   for the code point and others being used to indicate whether more bytes are coming.
+/// * In utf-16, a code point can be 2 bytes or 4 bytes. If it's four bytes, the first two bytes will
+///   be a 'high surrogate' (a value between 0xD800 and 0xDFFF) to communicate that another two
+///   bytes are coming to complete the code point.
+/// * In utf-32, a code point is always 32 bits. This is a bit wasteful, but makes for simple
+///   processing.
+///
+/// This helper function detects high surrogates (which are only used in utf-16) so the parser
+/// can know to require a second one immediately following.
+///
+/// Further reading:
+/// * <https://doc.rust-lang.org/std/primitive.char.html>
+/// * <https://www.unicode.org/glossary/#surrogate_code_point>
+fn code_point_is_a_high_surrogate(value: u32) -> bool {
+    (0xD800..=0xDFFF).contains(&value)
+}
diff --git a/src/lazy/text/parse_result.rs b/src/lazy/text/parse_result.rs
index 6dfb919b..5def24ca 100644
--- a/src/lazy/text/parse_result.rs
+++ b/src/lazy/text/parse_result.rs
@@ -68,10 +68,6 @@ pub struct InvalidInputError<'data> {
     label: Option<Cow<'static, str>>,
     // The nature of the error--what went wrong?
     description: Option<Cow<'static, str>>,
-    // A backtrace of errors that occurred leading to this one.
-    // XXX: This is the most expensive part of error handling and is likely not very useful.
-    //      Consider removing it if it doesn't carry its weight.
-    backtrace: Vec<InvalidInputError<'data>>,
     // The nom ErrorKind, which indicates which nom-provided parser encountered the error we're
     // bubbling up.
     nom_error_kind: Option<ErrorKind>,
@@ -85,7 +81,6 @@ impl<'data> InvalidInputError<'data> {
             label: None,
             description: None,
             nom_error_kind: None,
-            backtrace: Vec::new(),
         }
     }
 
@@ -107,10 +102,6 @@ impl<'data> InvalidInputError<'data> {
         self
     }
 
-    pub(crate) fn append_error(&mut self, error: InvalidInputError<'data>) {
-        self.backtrace.push(error)
-    }
-
     /// Returns a reference to the `description` text, if any.
     pub fn description(&self) -> Option<&str> {
         self.description.as_deref()
@@ -155,6 +146,19 @@ impl<'data> From<InvalidInputError<'data>> for IonError {
             message.push_str(" while ");
             message.push_str(label.as_ref());
         }
+        message.push_str("; buffer: ");
+        let input = invalid_input_error.input;
+        let buffer_text = if let Ok(text) = invalid_input_error.input.as_text() {
+            // TODO: This really should be graphemes instead of chars()
+            text.chars().take(32).collect::<String>()
+        } else {
+            format!(
+                "{:X?}",
+                &invalid_input_error.input.bytes()[..(32.min(input.len()))]
+            )
+        };
+        message.push_str(buffer_text.as_str());
+        message.push_str("...");
         let position = Position::with_offset(invalid_input_error.input.offset())
             .with_length(invalid_input_error.input.len());
         let decoding_error = DecodingError::new(message).with_position(position);
@@ -199,14 +203,10 @@ impl<'data> ParseError<TextBufferView<'data>> for IonParseError<'data> {
             .into()
     }
 
-    fn append(input: TextBufferView<'data>, kind: ErrorKind, mut other: Self) -> Self {
+    fn append(_input: TextBufferView<'data>, _kind: ErrorKind, other: Self) -> Self {
         // When an error stack is being built, this method is called to give the error
         // type an opportunity to aggregate the errors into a collection or a more descriptive
         // message. For now, we simply allow the most recent error to take precedence.
-        let new_error = InvalidInputError::new(input).with_nom_error_kind(kind);
-        if let IonParseError::Invalid(invalid_input_error) = &mut other {
-            invalid_input_error.backtrace.push(new_error)
-        }
         other
     }
 }
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 8b41a5e5..54777894 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -41,12 +41,17 @@ impl<'data> LazyRawTextReader<'data> {
         if buffer.is_empty() {
             return IonResult::incomplete("reading a top-level value", buffer.offset());
         }
-        let (buffer_after_whitespace, _whitespace) = buffer
-            .match_optional_comments_and_whitespace()
-            .with_context(
-                "skipping comments and whitespace between top-level values",
-                buffer,
-            )?;
+
+        let (buffer_after_whitespace, _whitespace) =
+            match buffer.match_optional_comments_and_whitespace() {
+                Ok((buf, ws)) => (buf, ws),
+                Err(nom::Err::Incomplete(_)) => return Ok(RawStreamItem::EndOfStream),
+                Err(e) => return IonResult::decoding_error(format!("broken: {:?}", e)),
+            };
+
+        if buffer_after_whitespace.is_empty() {
+            return Ok(RawStreamItem::EndOfStream);
+        }
         let (remaining, matched) = buffer_after_whitespace
             .match_top_level()
             .with_context("reading a top-level value", buffer_after_whitespace)?;
@@ -70,6 +75,7 @@ impl<'data> LazyRawReader<'data, TextEncoding> for LazyRawTextReader<'data> {
 mod tests {
     use super::*;
     use crate::lazy::decoder::LazyRawValue;
+    use crate::lazy::raw_value_ref::RawValueRef;
     use crate::IonType;
 
     #[test]
@@ -82,113 +88,120 @@ mod tests {
         */
         
         // Typed nulls
-            null
-            null.bool
-            null.int
+        
+        null
+        null.bool
+        null.int
             
         // Booleans
-            false
-            true
+
+        false
+        true
         
         // Integers
-            500
-            0x20
-            0b0101
+
+        500
+        0x20
+        0b0101
         
         // Floats
-            +inf
-            -inf
-            nan
-            3.6e0
-            2.5e23
-            -318e-2
+
+        +inf
+        -inf
+        nan
+        3.6e0
+        2.5e23
+        -318e-2
+            
+        // Strings
+
+        "Hello!"
+        "foo bar baz"
+        "😎😎😎"
+        "lol\n\r\0wat"                     // Single-character escapes
+        "\x48ello, \x77orld!"              // \x 2-digit hex escape
+        "\u0048ello, \u0077orld!"          // \u 4-digit hex escape
+        "\U00000048ello, \U00000077orld!"  // \U 8-digit hex escape
+        
         "#;
-        let mut reader = LazyRawTextReader::new(data.as_bytes());
 
-        // null
-        let lazy_untyped_null = reader.next()?.expect_value()?;
-        assert!(lazy_untyped_null.is_null());
-        assert_eq!(lazy_untyped_null.ion_type(), IonType::Null);
+        // Make a mutable string so we can append some things that require Rust-level escapes
+        let mut data = String::from(data);
+        // Escaped newlines are discarded
+        data.push_str("\"Hello,\\\n world!\"");
+
+        fn expect_next<'a, 'data>(
+            reader: &'a mut LazyRawTextReader<'data>,
+            expected: RawValueRef<'data, TextEncoding>,
+        ) {
+            let lazy_value = reader
+                .next()
+                .expect("advancing the reader failed")
+                .expect_value()
+                .expect("expected a value");
+            assert_eq!(
+                matches!(expected, RawValueRef::Null(_)),
+                lazy_value.is_null()
+            );
+            let value_ref = lazy_value.read().expect("reading failed");
+            assert_eq!(value_ref, expected, "{:?} != {:?}", value_ref, expected);
+        }
 
-        // null.bool
-        let lazy_null_bool = reader.next()?.expect_value()?;
-        assert!(lazy_null_bool.is_null());
-        assert_eq!(lazy_null_bool.ion_type(), IonType::Bool);
+        let reader = &mut LazyRawTextReader::new(data.as_bytes());
 
+        // null
+        expect_next(reader, RawValueRef::Null(IonType::Null));
+        // null.bool
+        expect_next(reader, RawValueRef::Null(IonType::Bool));
         // null.int
-        let lazy_null_int = reader.next()?.expect_value()?;
-        assert!(lazy_null_int.is_null());
-        assert_eq!(lazy_null_int.ion_type(), IonType::Int);
+        expect_next(reader, RawValueRef::Null(IonType::Int));
 
         // false
-        let lazy_bool_false = reader.next()?.expect_value()?;
-        assert!(!lazy_bool_false.is_null());
-        assert_eq!(lazy_bool_false.ion_type(), IonType::Bool);
-        assert!(!lazy_bool_false.read()?.expect_bool()?);
-
+        expect_next(reader, RawValueRef::Bool(false));
         // true
-        let lazy_bool_true = reader.next()?.expect_value()?;
-        assert!(!lazy_bool_true.is_null());
-        assert_eq!(lazy_bool_true.ion_type(), IonType::Bool);
-        assert!(lazy_bool_true.read()?.expect_bool()?);
+        expect_next(reader, RawValueRef::Bool(true));
 
         // 500
-        let lazy_int_decimal_500 = reader.next()?.expect_value()?;
-        assert!(!lazy_int_decimal_500.is_null());
-        assert_eq!(lazy_int_decimal_500.ion_type(), IonType::Int);
-        assert_eq!(lazy_int_decimal_500.read()?.expect_i64()?, 500);
-
+        expect_next(reader, RawValueRef::Int(500.into()));
         // 0x20
-        let lazy_int_hex_20 = reader.next()?.expect_value()?;
-        assert!(!lazy_int_hex_20.is_null());
-        assert_eq!(lazy_int_hex_20.ion_type(), IonType::Int);
-        assert_eq!(lazy_int_hex_20.read()?.expect_i64()?, 0x20); // decimal 32
-
+        expect_next(reader, RawValueRef::Int(0x20.into()));
         // 0b0101
-        let lazy_int_binary_0101 = reader.next()?.expect_value()?;
-        assert!(!lazy_int_binary_0101.is_null());
-        assert_eq!(lazy_int_binary_0101.ion_type(), IonType::Int);
-        assert_eq!(lazy_int_binary_0101.read()?.expect_i64()?, 0b0101); // decimal 5
+        expect_next(reader, RawValueRef::Int(0b0101.into()));
 
         // +inf
-        let lazy_float_pos_inf = reader.next()?.expect_value()?;
-        assert!(!lazy_float_pos_inf.is_null());
-        assert_eq!(lazy_float_pos_inf.ion_type(), IonType::Float);
-        assert_eq!(lazy_float_pos_inf.read()?.expect_float()?, f64::INFINITY);
-
+        expect_next(reader, RawValueRef::Float(f64::INFINITY));
         // -inf
-        let lazy_float_neg_inf = reader.next()?.expect_value()?;
-        assert!(!lazy_float_neg_inf.is_null());
-        assert_eq!(lazy_float_neg_inf.ion_type(), IonType::Float);
-        assert_eq!(
-            lazy_float_neg_inf.read()?.expect_float()?,
-            f64::NEG_INFINITY
-        );
-
+        expect_next(reader, RawValueRef::Float(f64::NEG_INFINITY));
         // nan
-        let lazy_float_neg_inf = reader.next()?.expect_value()?;
-        assert!(!lazy_float_neg_inf.is_null());
-        assert_eq!(lazy_float_neg_inf.ion_type(), IonType::Float);
-        assert!(lazy_float_neg_inf.read()?.expect_float()?.is_nan());
-
+        // NaN != NaN, so we have to spell this test out a bit more
+        assert!(reader
+            .next()?
+            .expect_value()?
+            .read()?
+            .expect_float()?
+            .is_nan());
         // 3.6e0
-        let lazy_float = reader.next()?.expect_value()?;
-        assert!(!lazy_float.is_null());
-        assert_eq!(lazy_float.ion_type(), IonType::Float);
-        assert_eq!(lazy_float.read()?.expect_float()?, 3.6f64);
-
+        expect_next(reader, RawValueRef::Float(3.6f64));
         // 2.25e23
-        let lazy_float = reader.next()?.expect_value()?;
-        assert!(!lazy_float.is_null());
-        assert_eq!(lazy_float.ion_type(), IonType::Float);
-        assert_eq!(lazy_float.read()?.expect_float()?, 2.5f64 * 10f64.powi(23));
-
-        // -3.14
-        let lazy_float = reader.next()?.expect_value()?;
-        assert!(!lazy_float.is_null());
-        assert_eq!(lazy_float.ion_type(), IonType::Float);
-        assert_eq!(lazy_float.read()?.expect_float()?, -3.18);
-
+        expect_next(reader, RawValueRef::Float(2.5f64 * 10f64.powi(23)));
+        // -3.18
+        expect_next(reader, RawValueRef::Float(-3.18f64));
+        // "Hello"
+        expect_next(reader, RawValueRef::String("Hello!".into()));
+        // "foo bar baz"
+        expect_next(reader, RawValueRef::String("foo bar baz".into()));
+        // "😎😎😎"
+        expect_next(reader, RawValueRef::String("😎😎😎".into()));
+        // "lol\n\r\0wat"
+        expect_next(reader, RawValueRef::String("lol\n\r\0wat".into()));
+        // "\x48ello, \x77orld!"
+        expect_next(reader, RawValueRef::String("Hello, world!".into()));
+        // "\u0048ello, \u0077orld!"
+        expect_next(reader, RawValueRef::String("Hello, world!".into()));
+        // "\U00000048ello, \U00000077orld!"
+        expect_next(reader, RawValueRef::String("Hello, world!".into()));
+        // "\"Hello,\\\n world!\" "
+        expect_next(reader, RawValueRef::String("Hello, world!".into()));
         Ok(())
     }
 }
diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs
index 1842ddd5..f888b63a 100644
--- a/src/lazy/text/value.rs
+++ b/src/lazy/text/value.rs
@@ -47,10 +47,13 @@ impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> {
     fn read(&self) -> IonResult<RawValueRef<'data, TextEncoding>> {
         let matched_input = self.input.slice(0, self.encoded_value.data_length());
         let value_ref = match self.encoded_value.matched() {
-            MatchedValue::Null(ion_type) => RawValueRef::Null(ion_type),
-            MatchedValue::Bool(b) => RawValueRef::Bool(b),
+            MatchedValue::Null(ion_type) => RawValueRef::Null(*ion_type),
+            MatchedValue::Bool(b) => RawValueRef::Bool(*b),
             MatchedValue::Int(i) => RawValueRef::Int(i.read(matched_input)?),
             MatchedValue::Float(f) => RawValueRef::Float(f.read(matched_input)?),
+            // ...decimal, timestamp...
+            MatchedValue::String(s) => RawValueRef::String(s.read(matched_input)?),
+            // ...and the rest!
         };
         Ok(value_ref)
     }
diff --git a/src/lazy/value_ref.rs b/src/lazy/value_ref.rs
index 54aa9272..0b4fb739 100644
--- a/src/lazy/value_ref.rs
+++ b/src/lazy/value_ref.rs
@@ -2,6 +2,7 @@ use crate::element::Value;
 use crate::lazy::decoder::LazyDecoder;
 use crate::lazy::r#struct::LazyStruct;
 use crate::lazy::sequence::LazySequence;
+use crate::lazy::str_ref::StrRef;
 use crate::result::IonFailure;
 use crate::{Decimal, Int, IonError, IonResult, IonType, SymbolRef, Timestamp};
 use std::fmt::{Debug, Formatter};
@@ -20,7 +21,7 @@ pub enum ValueRef<'top, 'data, D: LazyDecoder<'data>> {
     Float(f64),
     Decimal(Decimal),
     Timestamp(Timestamp),
-    String(&'data str),
+    String(StrRef<'data>),
     Symbol(SymbolRef<'top>),
     Blob(&'data [u8]),
     Clob(&'data [u8]),
@@ -152,7 +153,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> ValueRef<'top, 'data, D> {
         }
     }
 
-    pub fn expect_string(self) -> IonResult<&'data str> {
+    pub fn expect_string(self) -> IonResult<StrRef<'data>> {
         if let ValueRef::String(s) = self {
             Ok(s)
         } else {
@@ -286,7 +287,7 @@ mod tests {
         )?;
         let mut reader = LazyBinaryReader::new(&ion_data)?;
         let first_value = reader.expect_next()?.read()?;
-        assert_ne!(first_value, ValueRef::String("it's not a string"));
+        assert_ne!(first_value, ValueRef::String("it's not a string".into()));
         assert_eq!(first_value, ValueRef::Null(IonType::Null));
         assert_eq!(reader.expect_next()?.read()?, ValueRef::Bool(true));
         assert_eq!(reader.expect_next()?.read()?, ValueRef::Int(1.into()));
@@ -303,7 +304,10 @@ mod tests {
             reader.expect_next()?.read()?,
             ValueRef::Symbol(SymbolRef::from("foo"))
         );
-        assert_eq!(reader.expect_next()?.read()?, ValueRef::String("hello"));
+        assert_eq!(
+            reader.expect_next()?.read()?,
+            ValueRef::String("hello".into())
+        );
         assert_eq!(
             reader.expect_next()?.read()?,
             ValueRef::Blob(&[0x06, 0x5A, 0x1B]) // Base64-decoded "Blob"

From 357ca8f9f8cc83dbe740a478afed6376b5e717b2 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Fri, 28 Jul 2023 17:42:18 -0400
Subject: [PATCH 07/15] clippy fixes

---
 src/lazy/text/buffer.rs     |  2 +-
 src/lazy/text/matched.rs    | 13 +++++--------
 src/lazy/text/raw/reader.rs |  4 ++--
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index d7e0bc64..3ceb5de6 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -148,7 +148,7 @@ impl<'data> TextBufferView<'data> {
         // However, this one returns a `&'data str` instead of a `&'a str`, which is to say
         // that the string that's returned lives as long as the data itself, not just the duration
         // of the lifetime introduced by this method call.
-        std::str::from_utf8(&self.data).map_err(move |_| {
+        std::str::from_utf8(self.data).map_err(move |_| {
             let decoding_error =
                 DecodingError::new("encountered invalid UTF-8").with_position(self.offset());
             IonError::Decoding(decoding_error)
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index 7c21b3e2..38a1f6ac 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -196,18 +196,15 @@ impl MatchedString {
     // Strings longer than 64 bytes will allocate a larger space on the heap.
     const STACK_ALLOC_BUFFER_CAPACITY: usize = 64;
 
-    pub fn read<'a, 'data>(
-        &'a self,
-        matched_input: TextBufferView<'data>,
-    ) -> IonResult<StrRef<'data>> {
+    pub fn read<'data>(&self, matched_input: TextBufferView<'data>) -> IonResult<StrRef<'data>> {
         match self {
             MatchedString::Short(short) => self.read_short_string(*short, matched_input),
             MatchedString::Long(_) => todo!("long-form strings"),
         }
     }
 
-    fn read_short_string<'a, 'data>(
-        &'a self,
+    fn read_short_string<'data>(
+        &self,
         short: MatchedShortString,
         matched_input: TextBufferView<'data>,
     ) -> IonResult<StrRef<'data>> {
@@ -301,10 +298,10 @@ impl MatchedString {
         Ok(input_after_escape)
     }
 
-    fn hex_digits_code_point<'a, 'data>(
+    fn hex_digits_code_point<'data>(
         num_digits: usize,
         input: TextBufferView<'data>,
-        sanitized: &'a mut Vec<u8>,
+        sanitized: &mut Vec<u8>,
     ) -> IonResult<TextBufferView<'data>> {
         if input.len() < num_digits {
             return Err(IonError::Decoding(
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 54777894..20a1fa52 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -130,8 +130,8 @@ mod tests {
         // Escaped newlines are discarded
         data.push_str("\"Hello,\\\n world!\"");
 
-        fn expect_next<'a, 'data>(
-            reader: &'a mut LazyRawTextReader<'data>,
+        fn expect_next<'data>(
+            reader: &mut LazyRawTextReader<'data>,
             expected: RawValueRef<'data, TextEncoding>,
         ) {
             let lazy_value = reader

From 716ff343f294ff573fa7af36208174804d83c204 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Fri, 28 Jul 2023 20:19:31 -0400
Subject: [PATCH 08/15] Fix a couple of unit tests

---
 src/lazy/struct.rs      |  8 ++++----
 src/lazy/text/buffer.rs | 10 +++++++---
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/lazy/struct.rs b/src/lazy/struct.rs
index 2251b949..4956728f 100644
--- a/src/lazy/struct.rs
+++ b/src/lazy/struct.rs
@@ -147,7 +147,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
     ///# use ion_rs::IonResult;
     ///# fn main() -> IonResult<()> {
     /// use ion_rs::{Element, IonType};
-    /// use ion_rs::lazy::reader::LazyBinaryReader;;
+    /// use ion_rs::lazy::reader::LazyBinaryReader;
     /// use ion_rs::lazy::value_ref::ValueRef;
     ///
     /// let ion_data = r#"{foo: "hello", bar: null.list, baz: 3, bar: 4}"#;
@@ -156,7 +156,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
     ///
     /// let lazy_struct = reader.expect_next()?.read()?.expect_struct()?;
     ///
-    /// assert_eq!(lazy_struct.get("foo")?, Some(ValueRef::String("hello")));
+    /// assert_eq!(lazy_struct.get("foo")?, Some(ValueRef::String("hello".into())));
     /// assert_eq!(lazy_struct.get("baz")?, Some(ValueRef::Int(3.into())));
     /// assert_eq!(lazy_struct.get("bar")?, Some(ValueRef::Null(IonType::List)));
     ///# Ok(())
@@ -175,7 +175,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
     ///# use ion_rs::IonResult;
     ///# fn main() -> IonResult<()> {
     /// use ion_rs::Element;
-    /// use ion_rs::lazy::reader::LazyBinaryReader;;
+    /// use ion_rs::lazy::reader::LazyBinaryReader;
     /// use ion_rs::lazy::value_ref::ValueRef;
     ///
     /// let ion_data = r#"{foo: "hello", bar: null.list, baz: 3, bar: 4}"#;
@@ -184,7 +184,7 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyStruct<'top, 'data, D> {
     ///
     /// let lazy_struct = reader.expect_next()?.read()?.expect_struct()?;
     ///
-    /// assert_eq!(lazy_struct.get_expected("foo")?, ValueRef::String("hello"));
+    /// assert_eq!(lazy_struct.get_expected("foo")?, ValueRef::String("hello".into()));
     /// assert!(dbg!(lazy_struct.get_expected("Ontario")).is_err());
     ///# Ok(())
     ///# }
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index 3ceb5de6..24a6ec74 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -6,7 +6,7 @@ use std::slice::Iter;
 use nom::branch::alt;
 use nom::bytes::streaming::{is_a, is_not, tag, take_until, take_while1};
 use nom::character::streaming::{char, digit1, one_of};
-use nom::combinator::{map, opt, peek, recognize, success, value};
+use nom::combinator::{fail, map, opt, peek, recognize, success, value};
 use nom::error::{ErrorKind, ParseError};
 use nom::multi::many0_count;
 use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
@@ -601,7 +601,9 @@ impl<'data> TextBufferView<'data> {
     }
 
     fn match_long_string(self) -> IonParseResult<'data, MatchedString> {
-        todo!()
+        // TODO: implement long string matching
+        //       The `fail` parser is a nom builtin that never matches.
+        fail(self)
     }
 }
 
@@ -1015,6 +1017,8 @@ mod tests {
             MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_string));
         }
 
+        // These inputs have leading/trailing whitespace to make them more readable, but the string
+        // matcher doesn't accept whitespace. We'll trim each one before testing it.
         let good_inputs = &[
             r#"
             "hello"
@@ -1027,7 +1031,7 @@ mod tests {
             "#,
         ];
         for input in good_inputs {
-            match_string(input);
+            match_string(input.trim());
         }
 
         let bad_inputs = &[

From e29fec571b6ba9c81cd6ef4aad76a2619f8e0f38 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Fri, 28 Jul 2023 20:28:53 -0400
Subject: [PATCH 09/15] Less ambitious float eq comparison

---
 src/lazy/text/raw/reader.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 20a1fa52..d5ec559f 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -110,7 +110,7 @@ mod tests {
         -inf
         nan
         3.6e0
-        2.5e23
+        2.5e008
         -318e-2
             
         // Strings
@@ -183,7 +183,7 @@ mod tests {
         // 3.6e0
         expect_next(reader, RawValueRef::Float(3.6f64));
         // 2.25e23
-        expect_next(reader, RawValueRef::Float(2.5f64 * 10f64.powi(23)));
+        expect_next(reader, RawValueRef::Float(2.5f64 * 10f64.powi(8)));
         // -3.18
         expect_next(reader, RawValueRef::Float(-3.18f64));
         // "Hello"

From 8f79a3681e0dc82937cfdbb8582533f6c5576c1f Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Tue, 1 Aug 2023 13:18:24 -0700
Subject: [PATCH 10/15] Adds LazyRawTextReader support for reading symbols

---
 src/binary/binary_writer.rs    |   6 +-
 src/lazy/text/buffer.rs        | 184 +++++++++++++++++--
 src/lazy/text/encoded_value.rs |   1 +
 src/lazy/text/matched.rs       | 312 ++++++++++++++++++++-------------
 src/lazy/text/raw/reader.rs    |  68 ++++++-
 src/lazy/text/value.rs         |   1 +
 src/lazy/value.rs              |   6 +-
 src/raw_symbol_token_ref.rs    |  16 +-
 src/symbol_ref.rs              |  46 +++--
 src/text/raw_text_writer.rs    |   5 +-
 src/text/text_formatter.rs     |   5 +-
 src/text/text_writer.rs        |   6 +-
 12 files changed, 473 insertions(+), 183 deletions(-)

diff --git a/src/binary/binary_writer.rs b/src/binary/binary_writer.rs
index 305604b2..186f845b 100644
--- a/src/binary/binary_writer.rs
+++ b/src/binary/binary_writer.rs
@@ -128,7 +128,7 @@ impl<W: Write> IonWriter for BinaryWriter<W> {
                         panic!("Cannot set symbol ID ${symbol_id} as annotation. It is undefined.");
                     }
                 }
-                RawSymbolTokenRef::Text(text) => self.get_or_create_symbol_id(text),
+                RawSymbolTokenRef::Text(text) => self.get_or_create_symbol_id(text.as_ref()),
             };
             self.raw_writer.add_annotation(symbol_id);
         }
@@ -145,7 +145,7 @@ impl<W: Write> IonWriter for BinaryWriter<W> {
                     ));
                 }
             }
-            RawSymbolTokenRef::Text(text) => self.get_or_create_symbol_id(text),
+            RawSymbolTokenRef::Text(text) => self.get_or_create_symbol_id(text.as_ref()),
         };
         self.raw_writer.write_symbol(symbol_id)
     }
@@ -159,7 +159,7 @@ impl<W: Write> IonWriter for BinaryWriter<W> {
                     panic!("Cannot set symbol ID ${symbol_id} as field name. It is undefined.");
                 }
             }
-            RawSymbolTokenRef::Text(text) => self.get_or_create_symbol_id(text),
+            RawSymbolTokenRef::Text(text) => self.get_or_create_symbol_id(text.as_ref()),
         };
         self.raw_writer.set_field_name(text);
     }
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index 24a6ec74..d8a7def6 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -5,8 +5,8 @@ use std::slice::Iter;
 
 use nom::branch::alt;
 use nom::bytes::streaming::{is_a, is_not, tag, take_until, take_while1};
-use nom::character::streaming::{char, digit1, one_of};
-use nom::combinator::{fail, map, opt, peek, recognize, success, value};
+use nom::character::streaming::{char, digit1, one_of, satisfy};
+use nom::combinator::{fail, map, not, opt, peek, recognize, success, value};
 use nom::error::{ErrorKind, ParseError};
 use nom::multi::many0_count;
 use nom::sequence::{delimited, pair, preceded, separated_pair, terminated, tuple};
@@ -16,9 +16,9 @@ use crate::lazy::encoding::TextEncoding;
 use crate::lazy::raw_stream_item::RawStreamItem;
 use crate::lazy::text::encoded_value::EncodedTextValue;
 use crate::lazy::text::matched::{
-    MatchedFloat, MatchedInt, MatchedShortString, MatchedString, MatchedValue,
+    MatchedFloat, MatchedInt, MatchedShortString, MatchedString, MatchedSymbol, MatchedValue,
 };
-use crate::lazy::text::parse_result::IonParseError;
+use crate::lazy::text::parse_result::{InvalidInputError, IonParseError};
 use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
 use crate::lazy::text::value::LazyRawTextValue;
 use crate::result::DecodingError;
@@ -275,6 +275,16 @@ impl<'data> TextBufferView<'data> {
                     )
                 },
             ),
+            map(
+                match_and_length(Self::match_symbol),
+                |(matched_symbol, length)| {
+                    EncodedTextValue::new(
+                        MatchedValue::Symbol(matched_symbol),
+                        self.offset(),
+                        length,
+                    )
+                },
+            ),
             // TODO: The other Ion types
         ))
         .map(|encoded_value| LazyRawTextValue {
@@ -578,6 +588,103 @@ impl<'data> TextBufferView<'data> {
     /// Returns a matched buffer and a boolean indicating whether any escaped characters were
     /// found in the short string.
     fn match_short_string_body(self) -> IonParseResult<'data, (Self, bool)> {
+        Self::match_text_until_unescaped(self, b'\"')
+    }
+
+    fn match_long_string(self) -> IonParseResult<'data, MatchedString> {
+        // TODO: implement long string matching
+        //       The `fail` parser is a nom builtin that never matches.
+        fail(self)
+    }
+
+    fn match_symbol(self) -> IonParseResult<'data, MatchedSymbol> {
+        // TODO: identifiers
+        alt((
+            Self::match_symbol_id,
+            Self::match_identifier,
+            Self::match_quoted_symbol,
+        ))(self)
+    }
+
+    fn match_symbol_id(self) -> IonParseResult<'data, MatchedSymbol> {
+        recognize(terminated(
+            // Discard a `$` and parse an integer representing the symbol ID.
+            // Note that symbol ID integers:
+            //   * CANNOT have underscores in them. For example: `$1_0` is considered an identifier.
+            //   * CAN have leading zeros. There's precedent for this in ion-java.
+            preceded(tag("$"), digit1),
+            // Peek at the next character to make sure it's unrelated to the symbol ID.
+            // The spec does not offer a formal definition of what ends a symbol ID.
+            // This checks for either a stop_character (which performs its own `peek()`)
+            // or a colon (":"), which could be a field delimiter (":") or the beginning of
+            // an annotation delimiter ('::').
+            alt((
+                // Each of the parsers passed to `alt` must have the same return type. `stop_character`
+                // returns a char instead of a &str, so we use `recognize()` to get a &str instead.
+                recognize(Self::peek_stop_character),
+                peek(tag(":")), // Field delimiter (":") or annotation delimiter ("::")
+            )),
+        ))
+        .map(|_matched| MatchedSymbol::SymbolId)
+        .parse(self)
+    }
+
+    fn match_identifier(self) -> IonParseResult<'data, MatchedSymbol> {
+        let (remaining, identifier_text) = recognize(terminated(
+            pair(
+                Self::identifier_initial_character,
+                Self::identifier_trailing_characters,
+            ),
+            not(Self::identifier_trailing_character),
+        ))(self)?;
+        // Ion defines a number of keywords that are syntactically indistinguishable from
+        // identifiers. Keywords take precedence; we must ensure that any identifier we find
+        // is not actually a keyword.
+        const KEYWORDS: &[&str] = &["true", "false", "nan", "null"];
+        // In many situations, this check will not be necessary. Another type's parser will
+        // recognize the keyword as its own. (For example, `parse_boolean` would match the input
+        // text `false`.) However, because symbols can appear in annotations and the check for
+        // annotations precedes the parsing for all other types, we need this extra verification.
+        if KEYWORDS
+            .iter()
+            .any(|k| k.as_bytes() == identifier_text.bytes())
+        {
+            // Finding a keyword is not a fatal error, it just means that this parser doesn't match.
+            return Err(nom::Err::Error(IonParseError::Invalid(
+                InvalidInputError::new(self),
+            )));
+        }
+        Ok((remaining, MatchedSymbol::Identifier))
+    }
+
+    /// Matches any character that can appear at the start of an identifier.
+    fn identifier_initial_character(self) -> IonParseResult<'data, Self> {
+        recognize(alt((one_of("$_"), satisfy(|c| c.is_ascii_alphabetic()))))(self)
+    }
+
+    /// Matches any character that is legal in an identifier, though not necessarily at the beginning.
+    fn identifier_trailing_character(self) -> IonParseResult<'data, Self> {
+        recognize(alt((one_of("$_"), satisfy(|c| c.is_ascii_alphanumeric()))))(self)
+    }
+
+    /// Matches characters that are legal in an identifier, though not necessarily at the beginning.
+    fn identifier_trailing_characters(self) -> IonParseResult<'data, Self> {
+        recognize(many0_count(Self::identifier_trailing_character))(self)
+    }
+
+    fn match_quoted_symbol(self) -> IonParseResult<'data, MatchedSymbol> {
+        delimited(char('\''), Self::match_quoted_symbol_body, char('\''))
+            .map(|(_matched, contains_escaped_chars)| MatchedSymbol::Quoted(contains_escaped_chars))
+            .parse(self)
+    }
+
+    /// Returns a matched buffer and a boolean indicating whether any escaped characters were
+    /// found in the short string.
+    fn match_quoted_symbol_body(self) -> IonParseResult<'data, (Self, bool)> {
+        Self::match_text_until_unescaped(self, b'\'')
+    }
+
+    fn match_text_until_unescaped(self, delimiter: u8) -> IonParseResult<'data, (Self, bool)> {
         let mut is_escaped = false;
         let mut contains_escaped_chars = false;
         for (index, byte) in self.bytes().iter().enumerate() {
@@ -591,7 +698,7 @@ impl<'data> TextBufferView<'data> {
                 contains_escaped_chars = true;
                 continue;
             }
-            if *byte == b'\"' {
+            if *byte == delimiter {
                 let matched = self.slice(0, index);
                 let remaining = self.slice_to_end(index);
                 return Ok((remaining, (matched, contains_escaped_chars)));
@@ -599,12 +706,6 @@ impl<'data> TextBufferView<'data> {
         }
         Err(nom::Err::Incomplete(Needed::Unknown))
     }
-
-    fn match_long_string(self) -> IonParseResult<'data, MatchedString> {
-        // TODO: implement long string matching
-        //       The `fail` parser is a nom builtin that never matches.
-        fail(self)
-    }
 }
 
 // === nom trait implementations ===
@@ -840,13 +941,17 @@ mod tests {
             P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
         {
             let result = self.try_match(parser);
-            // We expect this to fail for one reason or another
-            assert!(
-                result.is_err(),
-                "Expected a parse failure for input: {:?}\nResult: {:?}",
-                self.input,
-                result
-            );
+            // We expect that only part of the input will match or that the entire
+            // input will be rejected outright.
+            if let Ok((_remaining, match_length)) = result {
+                assert_ne!(
+                    match_length,
+                    self.input.len() - 1,
+                    "parser unexpectedly matched the complete input: '{:?}\nResult: {:?}",
+                    self.input,
+                    result
+                );
+            }
         }
     }
 
@@ -1039,13 +1144,54 @@ mod tests {
             r#"
             hello"
             "#,
-            // Missing a trailing quote
+            // Missing a closing quote
             r#"
             "hello
             "#,
+            // Closing quote is escaped
+            r#"
+            "hello\"
+            "#,
         ];
         for input in bad_inputs {
             mismatch_string(input);
         }
     }
+
+    #[test]
+    fn test_match_symbol() {
+        fn match_symbol(input: &str) {
+            MatchTest::new(input).expect_match(match_length(TextBufferView::match_symbol));
+        }
+        fn mismatch_symbol(input: &str) {
+            MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_symbol));
+        }
+
+        // These inputs have leading/trailing whitespace to make them more readable, but the string
+        // matcher doesn't accept whitespace. We'll trim each one before testing it.
+        let good_inputs = &[
+            "'hello'",
+            "'😀😀😀'",
+            "'this has an escaped quote \\' right in the middle'",
+            "$308",
+            "$0",
+            "foo",
+            "name",
+            "$bar",
+            "_baz_quux",
+        ];
+        for input in good_inputs {
+            match_symbol(input);
+        }
+
+        let bad_inputs = &[
+            "'hello",    // No closing quote
+            "'hello\\'", // Closing quote is escaped
+            "$-8",       // Negative SID
+            "nan",       // Identifier that is also a keyword
+        ];
+        for input in bad_inputs {
+            mismatch_symbol(input);
+        }
+    }
 }
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
index 17779c2d..f0a2c096 100644
--- a/src/lazy/text/encoded_value.rs
+++ b/src/lazy/text/encoded_value.rs
@@ -116,6 +116,7 @@ impl EncodedTextValue {
             MatchedValue::Int(_) => IonType::Int,
             MatchedValue::Float(_) => IonType::Float,
             MatchedValue::String(_) => IonType::String,
+            MatchedValue::Symbol(_) => IonType::Symbol,
         }
     }
 
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index 38a1f6ac..bb619350 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -20,8 +20,10 @@
 //! re-discovered.
 
 use nom::character::is_hex_digit;
+use std::borrow::Cow;
 use std::num::IntErrorKind;
 use std::ops::Range;
+use std::str::FromStr;
 
 use num_bigint::BigInt;
 use num_traits::Num;
@@ -32,7 +34,7 @@ use crate::lazy::text::as_utf8::AsUtf8;
 use crate::lazy::text::buffer::TextBufferView;
 use crate::lazy::text::parse_result::InvalidInputError;
 use crate::result::{DecodingError, IonFailure};
-use crate::{Int, IonError, IonResult, IonType};
+use crate::{Int, IonError, IonResult, IonType, RawSymbolTokenRef};
 
 /// A partially parsed Ion value.
 #[derive(Clone, Debug, PartialEq)]
@@ -43,6 +45,7 @@ pub(crate) enum MatchedValue {
     Int(MatchedInt),
     Float(MatchedFloat),
     String(MatchedString),
+    Symbol(MatchedSymbol),
     // TODO: ...the other types
 }
 
@@ -134,8 +137,6 @@ impl MatchedFloat {
     const STACK_ALLOC_BUFFER_CAPACITY: usize = 32;
 
     pub fn read(&self, matched_input: TextBufferView) -> IonResult<f64> {
-        use std::str::FromStr;
-
         match self {
             MatchedFloat::PositiveInfinity => return Ok(f64::INFINITY),
             MatchedFloat::NegativeInfinity => return Ok(f64::NEG_INFINITY),
@@ -220,140 +221,137 @@ impl MatchedString {
         // that replaces the escaped characters with their corresponding bytes.
         let mut sanitized = Vec::with_capacity(matched_input.len());
 
-        Self::escape_short_string(body, &mut sanitized)?;
+        escape_text(body, &mut sanitized)?;
         let text = String::from_utf8(sanitized).unwrap();
         Ok(StrRef::from(text.to_string()))
     }
+}
 
-    fn escape_short_string(
-        matched_input: TextBufferView,
-        sanitized: &mut Vec<u8>,
-    ) -> IonResult<()> {
-        let mut remaining = matched_input;
-        while !remaining.is_empty() {
-            let next_escape = remaining.bytes().iter().position(|byte| *byte == b'\\');
-            remaining = if let Some(escape_offset) = next_escape {
-                // Everything up to the '\' is already clean. Write that slice to 'sanitized'.
-                let already_clean = remaining.slice(0, escape_offset);
-                sanitized.extend_from_slice(already_clean.bytes());
-                // Everything starting from the '\' needs to be evaluated.
-                let contains_escapes = remaining.slice_to_end(escape_offset);
-                Self::write_escaped(contains_escapes, sanitized)?
-            } else {
-                sanitized.extend_from_slice(remaining.bytes());
-                // 'remaining' is now empty
-                remaining.slice_to_end(remaining.len())
-            };
-        }
-
-        Ok(())
+fn escape_text(matched_input: TextBufferView, sanitized: &mut Vec<u8>) -> IonResult<()> {
+    let mut remaining = matched_input;
+    while !remaining.is_empty() {
+        let next_escape = remaining.bytes().iter().position(|byte| *byte == b'\\');
+        remaining = if let Some(escape_offset) = next_escape {
+            // Everything up to the '\' is already clean. Write that slice to 'sanitized'.
+            let already_clean = remaining.slice(0, escape_offset);
+            sanitized.extend_from_slice(already_clean.bytes());
+            // Everything starting from the '\' needs to be evaluated.
+            let contains_escapes = remaining.slice_to_end(escape_offset);
+            write_escaped(contains_escapes, sanitized)?
+        } else {
+            sanitized.extend_from_slice(remaining.bytes());
+            // 'remaining' is now empty
+            remaining.slice_to_end(remaining.len())
+        };
     }
 
-    fn write_escaped<'data>(
-        input: TextBufferView<'data>,
-        sanitized: &mut Vec<u8>,
-    ) -> IonResult<TextBufferView<'data>> {
-        // Note that by the time this method has been called, the parser has already confirmed that
-        // there is an appropriate closing delimiter. Thus, if any of the branches below run out of
-        // data, it means that it's a fatal error and not just an Incomplete.
-        debug_assert!(!input.is_empty());
-        debug_assert!(input.bytes()[0] == b'\\');
-        if input.len() == 1 {
-            return Err(IonError::Decoding(
-                DecodingError::new("found an escape ('\\') with no subsequent character")
-                    .with_position(input.offset()),
-            ));
-        }
-        let input_after_escape = input.slice_to_end(2); // After (e.g.) '\x'
-        let escape_id = input.bytes()[1];
-        let substitute = match escape_id {
-            b'n' => b'\n',
-            b'r' => b'\r',
-            b't' => b'\t',
-            b'\\' => b'\\',
-            b'/' => b'/',
-            b'"' => b'"',
-            b'\'' => b'\'',
-            b'?' => b'?',
-            b'0' => 0x00u8, // NUL
-            b'a' => 0x07u8, // alert BEL
-            b'b' => 0x08u8, // backspace
-            b'v' => 0x0Bu8, // vertical tab
-            b'f' => 0x0Cu8, // form feed
-            // If the byte following the '\' is a real newline (that is: 0x0A), we discard it.
-            b'\n' => return Ok(input_after_escape),
-            // These cases require more sophisticated parsing, not just a 1-to-1 mapping of bytes
-            b'x' => return Self::hex_digits_code_point(2, input_after_escape, sanitized),
-            b'u' => return Self::hex_digits_code_point(4, input_after_escape, sanitized),
-            b'U' => return Self::hex_digits_code_point(8, input_after_escape, sanitized),
-            _ => {
-                return Err(IonError::Decoding(
-                    DecodingError::new(format!("invalid escape sequence '\\{}", escape_id))
-                        .with_position(input.offset()),
-                ))
-            }
-        };
+    Ok(())
+}
 
-        sanitized.push(substitute);
-        Ok(input_after_escape)
+fn write_escaped<'data>(
+    input: TextBufferView<'data>,
+    sanitized: &mut Vec<u8>,
+) -> IonResult<TextBufferView<'data>> {
+    // Note that by the time this method has been called, the parser has already confirmed that
+    // there is an appropriate closing delimiter. Thus, if any of the branches below run out of
+    // data, it means that it's a fatal error and not just an Incomplete.
+    debug_assert!(!input.is_empty());
+    debug_assert!(input.bytes()[0] == b'\\');
+    if input.len() == 1 {
+        return Err(IonError::Decoding(
+            DecodingError::new("found an escape ('\\') with no subsequent character")
+                .with_position(input.offset()),
+        ));
     }
-
-    fn hex_digits_code_point<'data>(
-        num_digits: usize,
-        input: TextBufferView<'data>,
-        sanitized: &mut Vec<u8>,
-    ) -> IonResult<TextBufferView<'data>> {
-        if input.len() < num_digits {
+    let input_after_escape = input.slice_to_end(2); // After (e.g.) '\x'
+    let escape_id = input.bytes()[1];
+    let substitute = match escape_id {
+        b'n' => b'\n',
+        b'r' => b'\r',
+        b't' => b'\t',
+        b'\\' => b'\\',
+        b'/' => b'/',
+        b'"' => b'"',
+        b'\'' => b'\'',
+        b'?' => b'?',
+        b'0' => 0x00u8, // NUL
+        b'a' => 0x07u8, // alert BEL
+        b'b' => 0x08u8, // backspace
+        b'v' => 0x0Bu8, // vertical tab
+        b'f' => 0x0Cu8, // form feed
+        // If the byte following the '\' is a real newline (that is: 0x0A), we discard it.
+        b'\n' => return Ok(input_after_escape),
+        // These cases require more sophisticated parsing, not just a 1-to-1 mapping of bytes
+        b'x' => return hex_digits_code_point(2, input_after_escape, sanitized),
+        b'u' => return hex_digits_code_point(4, input_after_escape, sanitized),
+        b'U' => return hex_digits_code_point(8, input_after_escape, sanitized),
+        _ => {
             return Err(IonError::Decoding(
-                DecodingError::new(format!(
-                    "found a {}-hex-digit escape sequence with only {} digits",
-                    num_digits,
-                    input.len()
-                ))
-                .with_position(input.offset()),
-            ));
+                DecodingError::new(format!("invalid escape sequence '\\{}", escape_id))
+                    .with_position(input.offset()),
+            ))
         }
+    };
 
-        let hex_digit_bytes = &input.bytes()[..num_digits];
-
-        let all_are_hex_digits = hex_digit_bytes
-            .iter()
-            .take(num_digits)
-            .copied()
-            .all(is_hex_digit);
-        if !all_are_hex_digits {
-            return Err(IonError::Decoding(
-                DecodingError::new(format!(
-                    "found a {}-hex-digit escape sequence that contained an invalid hex digit",
-                    num_digits,
-                ))
-                .with_position(input.offset()),
-            ));
-        }
-        // We just confirmed all of the digits are ASCII hex digits, so these steps cannot fail.
-        let hex_digits = std::str::from_utf8(hex_digit_bytes).unwrap();
-        let code_point = u32::from_str_radix(hex_digits, 16).unwrap();
-
-        // Check to see if this is a high surrogate; if it is, our code point isn't complete. Another
-        // unicode escape representing the low surrogate has to be next in the input to complete it.
-        // See the docs for this helper function for details. (Note: this will only ever be true for
-        // 4- and 8-digit escape sequences. `\x` escapes don't have enough digits to represent a
-        // high surrogate.)
-        if code_point_is_a_high_surrogate(code_point) {
-            todo!("support surrogate pairs")
-        }
+    sanitized.push(substitute);
+    Ok(input_after_escape)
+}
 
-        // A Rust `char` can represent any Unicode scalar value--a code point that is not part of a
-        // surrogate pair. If the value we found isn't a high surrogate, then it's a complete scalar
-        // value. We can safely convert it to a `char`.
-        let character = char::from_u32(code_point).unwrap();
-        let utf8_buffer: &mut [u8; 4] = &mut [0; 4];
-        let utf8_encoded = character.encode_utf8(utf8_buffer);
-        sanitized.extend_from_slice(utf8_encoded.as_bytes());
+fn hex_digits_code_point<'data>(
+    num_digits: usize,
+    input: TextBufferView<'data>,
+    sanitized: &mut Vec<u8>,
+) -> IonResult<TextBufferView<'data>> {
+    if input.len() < num_digits {
+        return Err(IonError::Decoding(
+            DecodingError::new(format!(
+                "found a {}-hex-digit escape sequence with only {} digits",
+                num_digits,
+                input.len()
+            ))
+            .with_position(input.offset()),
+        ));
+    }
 
-        // Skip beyond the digits we just processed
-        Ok(input.slice_to_end(num_digits))
+    let hex_digit_bytes = &input.bytes()[..num_digits];
+
+    let all_are_hex_digits = hex_digit_bytes
+        .iter()
+        .take(num_digits)
+        .copied()
+        .all(is_hex_digit);
+    if !all_are_hex_digits {
+        return Err(IonError::Decoding(
+            DecodingError::new(format!(
+                "found a {}-hex-digit escape sequence that contained an invalid hex digit",
+                num_digits,
+            ))
+            .with_position(input.offset()),
+        ));
     }
+    // We just confirmed all of the digits are ASCII hex digits, so these steps cannot fail.
+    let hex_digits = std::str::from_utf8(hex_digit_bytes).unwrap();
+    let code_point = u32::from_str_radix(hex_digits, 16).unwrap();
+
+    // Check to see if this is a high surrogate; if it is, our code point isn't complete. Another
+    // unicode escape representing the low surrogate has to be next in the input to complete it.
+    // See the docs for this helper function for details. (Note: this will only ever be true for
+    // 4- and 8-digit escape sequences. `\x` escapes don't have enough digits to represent a
+    // high surrogate.)
+    if code_point_is_a_high_surrogate(code_point) {
+        todo!("support surrogate pairs")
+    }
+
+    // A Rust `char` can represent any Unicode scalar value--a code point that is not part of a
+    // surrogate pair. If the value we found isn't a high surrogate, then it's a complete scalar
+    // value. We can safely convert it to a `char`.
+    let character = char::from_u32(code_point).unwrap();
+    let utf8_buffer: &mut [u8; 4] = &mut [0; 4];
+    let utf8_encoded = character.encode_utf8(utf8_buffer);
+    sanitized.extend_from_slice(utf8_encoded.as_bytes());
+
+    // Skip beyond the digits we just processed
+    Ok(input.slice_to_end(num_digits))
 }
 
 /// Returns `true` if the provided code point is a utf-16 high surrogate.
@@ -381,3 +379,71 @@ impl MatchedString {
 fn code_point_is_a_high_surrogate(value: u32) -> bool {
     (0xD800..=0xDFFF).contains(&value)
 }
+
+#[derive(Clone, Debug, PartialEq)]
+pub(crate) enum MatchedSymbol {
+    /// A numeric symbol ID (e.g. `$21`)
+    SymbolId,
+    /// The symbol is an unquoted identifier (e.g. `foo`)
+    Identifier,
+    /// The symbol is delimited by single quotes.
+    Quoted(bool),
+    // TODO: Operators in S-Expressions
+}
+
+impl MatchedSymbol {
+    pub fn read<'data>(
+        &self,
+        matched_input: TextBufferView<'data>,
+    ) -> IonResult<RawSymbolTokenRef<'data>> {
+        match self {
+            MatchedSymbol::SymbolId => self.read_symbol_id(matched_input),
+            MatchedSymbol::Identifier => self.read_identifier(matched_input),
+            MatchedSymbol::Quoted(contains_escaped_chars) => {
+                self.read_quoted(matched_input, *contains_escaped_chars)
+            }
+        }
+    }
+
+    fn read_quoted<'data>(
+        &self,
+        matched_input: TextBufferView<'data>,
+        contains_escaped_chars: bool,
+    ) -> IonResult<RawSymbolTokenRef<'data>> {
+        // Take a slice of the input that ignores the first and last bytes, which are quotes.
+        let body = matched_input.slice(1, matched_input.len() - 2);
+        if !contains_escaped_chars {
+            // There are no escaped characters, so we can just validate the string in-place.
+            let text = body.as_text()?;
+            let str_ref = RawSymbolTokenRef::Text(text.into());
+            return Ok(str_ref);
+        }
+
+        // Otherwise, there are escaped characters. We need to build a new version of our symbol
+        // that replaces the escaped characters with their corresponding bytes.
+        let mut sanitized = Vec::with_capacity(matched_input.len());
+
+        escape_text(body, &mut sanitized)?;
+        let text = String::from_utf8(sanitized).unwrap();
+        Ok(RawSymbolTokenRef::Text(text.into()))
+    }
+    fn read_identifier<'data>(
+        &self,
+        matched_input: TextBufferView<'data>,
+    ) -> IonResult<RawSymbolTokenRef<'data>> {
+        matched_input
+            .as_text()
+            .map(|t| RawSymbolTokenRef::Text(Cow::Borrowed(t)))
+    }
+    fn read_symbol_id<'data>(
+        &self,
+        matched_input: TextBufferView<'data>,
+    ) -> IonResult<RawSymbolTokenRef<'data>> {
+        // Skip past the first byte, which has to be a `$`.
+        let text = matched_input.slice_to_end(1).as_text()?;
+        // It's not possible for the number parsing to fail because the matcher's rules
+        // guarantee that this string contains only decimal digits.
+        let sid = usize::from_str(text).expect("loading symbol ID as usize");
+        Ok(RawSymbolTokenRef::SymbolId(sid))
+    }
+}
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index d5ec559f..48495d81 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -76,11 +76,13 @@ mod tests {
     use super::*;
     use crate::lazy::decoder::LazyRawValue;
     use crate::lazy::raw_value_ref::RawValueRef;
-    use crate::IonType;
+    use crate::{IonType, RawSymbolTokenRef};
 
     #[test]
     fn test_top_level() -> IonResult<()> {
-        let data = r#"
+        let mut data = String::new();
+        data.push_str(
+            r#"
         /*
             This test demonstrates lazily reading top-level values
             of various Ion types. The values are interspersed with
@@ -123,13 +125,29 @@ mod tests {
         "\u0048ello, \u0077orld!"          // \u 4-digit hex escape
         "\U00000048ello, \U00000077orld!"  // \U 8-digit hex escape
         
-        "#;
-
-        // Make a mutable string so we can append some things that require Rust-level escapes
-        let mut data = String::from(data);
+        "#,
+        );
         // Escaped newlines are discarded
         data.push_str("\"Hello,\\\n world!\"");
 
+        data.push_str(
+            r#"
+        // Symbols
+        
+        'foo'
+        'Hello, world!'
+        '😎😎😎'
+        
+        firstName
+        date_of_birth
+        $variable
+        
+        $0
+        $10
+        $733
+        "#,
+        );
+
         fn expect_next<'data>(
             reader: &mut LazyRawTextReader<'data>,
             expected: RawValueRef<'data, TextEncoding>,
@@ -202,6 +220,44 @@ mod tests {
         expect_next(reader, RawValueRef::String("Hello, world!".into()));
         // "\"Hello,\\\n world!\" "
         expect_next(reader, RawValueRef::String("Hello, world!".into()));
+        // 'foo'
+        expect_next(
+            reader,
+            RawValueRef::Symbol(RawSymbolTokenRef::Text("foo".into())),
+        );
+        expect_next(
+            reader,
+            RawValueRef::Symbol(RawSymbolTokenRef::Text("Hello, world!".into())),
+        );
+        expect_next(
+            reader,
+            RawValueRef::Symbol(RawSymbolTokenRef::Text("😎😎😎".into())),
+        );
+        // firstName
+        expect_next(
+            reader,
+            RawValueRef::Symbol(RawSymbolTokenRef::Text("firstName".into())),
+        );
+        // date_of_birth
+        expect_next(
+            reader,
+            RawValueRef::Symbol(RawSymbolTokenRef::Text("date_of_birth".into())),
+        );
+        // $variable
+        expect_next(
+            reader,
+            RawValueRef::Symbol(RawSymbolTokenRef::Text("$variable".into())),
+        );
+        // $0
+        expect_next(reader, RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(0)));
+        // $10
+        expect_next(reader, RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(10)));
+        // $733
+        expect_next(
+            reader,
+            RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(733)),
+        );
+
         Ok(())
     }
 }
diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs
index f888b63a..3df2e985 100644
--- a/src/lazy/text/value.rs
+++ b/src/lazy/text/value.rs
@@ -53,6 +53,7 @@ impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> {
             MatchedValue::Float(f) => RawValueRef::Float(f.read(matched_input)?),
             // ...decimal, timestamp...
             MatchedValue::String(s) => RawValueRef::String(s.read(matched_input)?),
+            MatchedValue::Symbol(s) => RawValueRef::Symbol(s.read(matched_input)?),
             // ...and the rest!
         };
         Ok(value_ref)
diff --git a/src/lazy/value.rs b/src/lazy/value.rs
index 8f09cdbf..f25caf7e 100644
--- a/src/lazy/value.rs
+++ b/src/lazy/value.rs
@@ -9,6 +9,7 @@ use crate::{
     Annotations, Element, IntoAnnotatedElement, IonError, IonResult, IonType, RawSymbolTokenRef,
     SymbolRef, SymbolTable, Value,
 };
+use std::borrow::Cow;
 
 /// A value in a binary Ion stream whose header has been parsed but whose body (i.e. its data) has
 /// not. A `LazyValue` is immutable; its data can be read any number of times.
@@ -184,7 +185,8 @@ impl<'top, 'data, D: LazyDecoder<'data>> LazyValue<'top, 'data, D> {
                             ))
                         })?
                         .into(),
-                    RawSymbolTokenRef::Text(text) => text.into(),
+                    RawSymbolTokenRef::Text(Cow::Borrowed(text)) => text.into(),
+                    RawSymbolTokenRef::Text(Cow::Owned(text)) => text.into(),
                 };
                 ValueRef::Symbol(symbol)
             }
@@ -333,7 +335,7 @@ where
                 )),
                 Some(symbol) => Some(Ok(symbol.into())),
             },
-            Ok(RawSymbolTokenRef::Text(text)) => Some(Ok(SymbolRef::with_text(text))),
+            Ok(RawSymbolTokenRef::Text(text)) => Some(Ok(text.into())),
             Err(e) => Some(Err(e)),
         }
     }
diff --git a/src/raw_symbol_token_ref.rs b/src/raw_symbol_token_ref.rs
index d4a00c4d..dddedc7c 100644
--- a/src/raw_symbol_token_ref.rs
+++ b/src/raw_symbol_token_ref.rs
@@ -1,11 +1,12 @@
 use crate::raw_symbol_token::RawSymbolToken;
 use crate::{Symbol, SymbolId};
+use std::borrow::Cow;
 
 /// Like RawSymbolToken, but the Text variant holds a borrowed reference instead of a String.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum RawSymbolTokenRef<'a> {
     SymbolId(SymbolId),
-    Text(&'a str),
+    Text(Cow<'a, str>),
 }
 
 /// Implemented by types that can be viewed as a [RawSymbolTokenRef] without allocations.
@@ -15,10 +16,7 @@ pub trait AsRawSymbolTokenRef {
 
 impl<'a> AsRawSymbolTokenRef for RawSymbolTokenRef<'a> {
     fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef {
-        match self {
-            RawSymbolTokenRef::SymbolId(sid) => RawSymbolTokenRef::SymbolId(*sid),
-            RawSymbolTokenRef::Text(text) => RawSymbolTokenRef::Text(text),
-        }
+        self.clone()
     }
 }
 
@@ -30,20 +28,20 @@ impl AsRawSymbolTokenRef for SymbolId {
 
 impl AsRawSymbolTokenRef for String {
     fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef {
-        RawSymbolTokenRef::Text(self.as_str())
+        RawSymbolTokenRef::Text(Cow::from(self.as_str()))
     }
 }
 
 impl AsRawSymbolTokenRef for &str {
     fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef {
-        RawSymbolTokenRef::Text(self)
+        RawSymbolTokenRef::Text(Cow::from(*self))
     }
 }
 
 impl AsRawSymbolTokenRef for Symbol {
     fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef {
         match self.text() {
-            Some(text) => RawSymbolTokenRef::Text(text),
+            Some(text) => RawSymbolTokenRef::Text(Cow::from(text)),
             None => RawSymbolTokenRef::SymbolId(0),
         }
     }
@@ -62,7 +60,7 @@ impl AsRawSymbolTokenRef for RawSymbolToken {
     fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef {
         match self {
             RawSymbolToken::SymbolId(sid) => RawSymbolTokenRef::SymbolId(*sid),
-            RawSymbolToken::Text(text) => RawSymbolTokenRef::Text(text.as_str()),
+            RawSymbolToken::Text(text) => RawSymbolTokenRef::Text(Cow::from(text.as_str())),
         }
     }
 }
diff --git a/src/symbol_ref.rs b/src/symbol_ref.rs
index 9cd42cac..815c75fe 100644
--- a/src/symbol_ref.rs
+++ b/src/symbol_ref.rs
@@ -1,5 +1,5 @@
 use crate::Symbol;
-use std::borrow::Borrow;
+use std::borrow::{Borrow, Cow};
 use std::fmt::{Debug, Formatter};
 use std::hash::{Hash, Hasher};
 
@@ -7,19 +7,19 @@ use std::hash::{Hash, Hasher};
 /// static lifetime), a `SymbolRef` may have known or undefined text (i.e. `$0`).
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone)]
 pub struct SymbolRef<'a> {
-    text: Option<&'a str>,
+    text: Option<Cow<'a, str>>,
 }
 
 impl<'a> Debug for SymbolRef<'a> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.text.unwrap_or("$0"))
+        write!(f, "{}", self.text().unwrap_or("$0"))
     }
 }
 
 impl<'a> SymbolRef<'a> {
     /// If this symbol has known text, returns `Some(&str)`. Otherwise, returns `None`.
     pub fn text(&self) -> Option<&str> {
-        self.text
+        self.text.as_ref().map(|t| t.as_ref())
     }
 
     /// Constructs a `SymbolRef` with unknown text.
@@ -28,14 +28,17 @@ impl<'a> SymbolRef<'a> {
     }
 
     /// Constructs a `SymbolRef` with the specified text.
-    pub fn with_text(text: &str) -> SymbolRef {
-        SymbolRef { text: Some(text) }
+    pub fn with_text(text: impl Into<Cow<'a, str>>) -> SymbolRef<'a> {
+        SymbolRef {
+            text: Some(text.into()),
+        }
     }
 
     pub fn to_owned(self) -> Symbol {
-        match self.text() {
+        match self.text {
             None => Symbol::unknown_text(),
-            Some(text) => Symbol::owned(text),
+            Some(Cow::Borrowed(text)) => Symbol::owned(text),
+            Some(Cow::Owned(text)) => Symbol::owned(text),
         }
     }
 }
@@ -60,14 +63,14 @@ pub trait AsSymbolRef {
 impl<'a, A: AsRef<str> + 'a> AsSymbolRef for A {
     fn as_symbol_ref(&self) -> SymbolRef {
         SymbolRef {
-            text: Some(self.as_ref()),
+            text: Some(Cow::Borrowed(self.as_ref())),
         }
     }
 }
 
 impl<'a> Hash for SymbolRef<'a> {
     fn hash<H: Hasher>(&self, state: &mut H) {
-        match self.text {
+        match self.text() {
             None => 0.hash(state),
             Some(text) => text.hash(state),
         }
@@ -76,18 +79,33 @@ impl<'a> Hash for SymbolRef<'a> {
 
 impl<'a> From<&'a str> for SymbolRef<'a> {
     fn from(text: &'a str) -> Self {
-        Self { text: Some(text) }
+        Self {
+            text: Some(Cow::Borrowed(text)),
+        }
     }
 }
 
-impl<'a> From<&'a Symbol> for SymbolRef<'a> {
-    fn from(symbol: &'a Symbol) -> Self {
+impl<'a> From<String> for SymbolRef<'a> {
+    fn from(text: String) -> Self {
         Self {
-            text: symbol.text(),
+            text: Some(Cow::Owned(text)),
         }
     }
 }
 
+impl<'a> From<Cow<'a, str>> for SymbolRef<'a> {
+    fn from(value: Cow<'a, str>) -> Self {
+        Self { text: Some(value) }
+    }
+}
+
+impl<'a> From<&'a Symbol> for SymbolRef<'a> {
+    fn from(symbol: &'a Symbol) -> Self {
+        let text = symbol.text().map(Cow::Borrowed);
+        Self { text }
+    }
+}
+
 // Note that this method panics if the SymbolRef has unknown text! This is unfortunate but is required
 // in order to allow a HashMap<SymbolRef, _> to do lookups with a &str instead of a &SymbolRef
 impl<'a> Borrow<str> for SymbolRef<'a> {
diff --git a/src/text/raw_text_writer.rs b/src/text/raw_text_writer.rs
index b0e75717..68a043a0 100644
--- a/src/text/raw_text_writer.rs
+++ b/src/text/raw_text_writer.rs
@@ -320,12 +320,13 @@ impl<W: Write> RawTextWriter<W> {
         match token.as_raw_symbol_token_ref() {
             RawSymbolTokenRef::SymbolId(sid) => write!(output, "${sid}")?,
             RawSymbolTokenRef::Text(text)
-                if Self::token_is_keyword(text) || Self::token_resembles_symbol_id(text) =>
+                if Self::token_is_keyword(text.as_ref())
+                    || Self::token_resembles_symbol_id(text.as_ref()) =>
             {
                 // Write the symbol text in single quotes
                 write!(output, "'{text}'")?;
             }
-            RawSymbolTokenRef::Text(text) if Self::token_is_identifier(text) => {
+            RawSymbolTokenRef::Text(text) if Self::token_is_identifier(text.as_ref()) => {
                 // Write the symbol text without quotes
                 write!(output, "{text}")?
             }
diff --git a/src/text/text_formatter.rs b/src/text/text_formatter.rs
index 828e9fb5..404d556d 100644
--- a/src/text/text_formatter.rs
+++ b/src/text/text_formatter.rs
@@ -229,12 +229,13 @@ impl<'a, W: std::fmt::Write> IonValueFormatter<'a, W> {
         match token.as_raw_symbol_token_ref() {
             RawSymbolTokenRef::SymbolId(sid) => write!(self.output, "${sid}")?,
             RawSymbolTokenRef::Text(text)
-                if Self::token_is_keyword(text) || Self::token_resembles_symbol_id(text) =>
+                if Self::token_is_keyword(text.as_ref())
+                    || Self::token_resembles_symbol_id(text.as_ref()) =>
             {
                 // Write the symbol text in single quotes
                 write!(self.output, "'{text}'")?;
             }
-            RawSymbolTokenRef::Text(text) if Self::token_is_identifier(text) => {
+            RawSymbolTokenRef::Text(text) if Self::token_is_identifier(text.as_ref()) => {
                 // Write the symbol text without quotes
                 write!(self.output, "{text}")?
             }
diff --git a/src/text/text_writer.rs b/src/text/text_writer.rs
index 7fc140d0..c4829974 100644
--- a/src/text/text_writer.rs
+++ b/src/text/text_writer.rs
@@ -123,7 +123,7 @@ impl<W: Write> IonWriter for TextWriter<W> {
                 RawSymbolTokenRef::SymbolId(symbol_id) => {
                     // Get the text associated with this symbol ID
                     match self.symbol_table.text_for(symbol_id) {
-                        Some(text) => RawSymbolTokenRef::Text(text),
+                        Some(text) => RawSymbolTokenRef::Text(text.into()),
                         None => RawSymbolTokenRef::SymbolId(symbol_id),
                     }
                 }
@@ -138,7 +138,7 @@ impl<W: Write> IonWriter for TextWriter<W> {
             RawSymbolTokenRef::SymbolId(symbol_id) => {
                 // Get the text associated with this symbol ID
                 match self.symbol_table.text_for(symbol_id) {
-                    Some(text) => RawSymbolTokenRef::Text(text),
+                    Some(text) => RawSymbolTokenRef::Text(text.into()),
                     None => RawSymbolTokenRef::SymbolId(symbol_id),
                 }
             }
@@ -152,7 +152,7 @@ impl<W: Write> IonWriter for TextWriter<W> {
             RawSymbolTokenRef::SymbolId(symbol_id) => {
                 // Get the text associated with this symbol ID
                 match self.symbol_table.text_for(symbol_id) {
-                    Some(text) => RawSymbolTokenRef::Text(text),
+                    Some(text) => RawSymbolTokenRef::Text(text.into()),
                     None => RawSymbolTokenRef::SymbolId(symbol_id),
                 }
             }

From 4cb9b2b906e18ebc2cd11b7ff641273ff21d3726 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Tue, 1 Aug 2023 13:37:07 -0700
Subject: [PATCH 11/15] Adds more doc comments

---
 src/lazy/text/buffer.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index d8a7def6..e1c7a5e5 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -597,6 +597,7 @@ impl<'data> TextBufferView<'data> {
         fail(self)
     }
 
+    /// Matches a symbol ID (`$28`), an identifier (`foo`), or a quoted symbol (`'foo'`).
     fn match_symbol(self) -> IonParseResult<'data, MatchedSymbol> {
         // TODO: identifiers
         alt((
@@ -606,6 +607,7 @@ impl<'data> TextBufferView<'data> {
         ))(self)
     }
 
+    /// Matches a symbol ID (`$28`).
     fn match_symbol_id(self) -> IonParseResult<'data, MatchedSymbol> {
         recognize(terminated(
             // Discard a `$` and parse an integer representing the symbol ID.
@@ -629,6 +631,7 @@ impl<'data> TextBufferView<'data> {
         .parse(self)
     }
 
+    /// Matches an identifier (`foo`).
     fn match_identifier(self) -> IonParseResult<'data, MatchedSymbol> {
         let (remaining, identifier_text) = recognize(terminated(
             pair(
@@ -672,6 +675,7 @@ impl<'data> TextBufferView<'data> {
         recognize(many0_count(Self::identifier_trailing_character))(self)
     }
 
+    /// Matches a quoted symbol (`'foo'`).
     fn match_quoted_symbol(self) -> IonParseResult<'data, MatchedSymbol> {
         delimited(char('\''), Self::match_quoted_symbol_body, char('\''))
             .map(|(_matched, contains_escaped_chars)| MatchedSymbol::Quoted(contains_escaped_chars))
@@ -684,6 +688,8 @@ impl<'data> TextBufferView<'data> {
         Self::match_text_until_unescaped(self, b'\'')
     }
 
+    /// A helper method for matching bytes until the specified delimiter. Ignores any byte
+    /// (including the delimiter) that is prefaced by the escape character `\`.
     fn match_text_until_unescaped(self, delimiter: u8) -> IonParseResult<'data, (Self, bool)> {
         let mut is_escaped = false;
         let mut contains_escaped_chars = false;

From 54470d2358a730098d811e060960bc876bc1a10d Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Tue, 1 Aug 2023 16:03:49 -0700
Subject: [PATCH 12/15] More doc comments

---
 src/lazy/text/matched.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index bb619350..db9bdf0b 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -297,6 +297,8 @@ fn write_escaped<'data>(
     Ok(input_after_escape)
 }
 
+/// Reads the next `num_digits` bytes from `input` as a `char`, then writes that `char`'s UTF8 bytes
+/// to `sanitized`.
 fn hex_digits_code_point<'data>(
     num_digits: usize,
     input: TextBufferView<'data>,
@@ -386,7 +388,8 @@ pub(crate) enum MatchedSymbol {
     SymbolId,
     /// The symbol is an unquoted identifier (e.g. `foo`)
     Identifier,
-    /// The symbol is delimited by single quotes.
+    /// The symbol is delimited by single quotes. Holds a `bool` indicating whether the
+    /// matched input contained any escaped bytes.
     Quoted(bool),
     // TODO: Operators in S-Expressions
 }

From 78014e7c0cb3272fb05c4b3a0fe138783b878f24 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Thu, 3 Aug 2023 09:26:09 -0700
Subject: [PATCH 13/15] Adds `LazyRawTextReader` support for reading lists

---
 src/lazy/binary/raw/sequence.rs |  18 ++---
 src/lazy/encoding.rs            |  35 +--------
 src/lazy/text/buffer.rs         |  37 ++++++++++
 src/lazy/text/encoded_value.rs  |   3 +-
 src/lazy/text/matched.rs        |  17 +++--
 src/lazy/text/parse_result.rs   |  20 +++++
 src/lazy/text/raw/mod.rs        |   1 +
 src/lazy/text/raw/reader.rs     |  17 +++++
 src/lazy/text/raw/sequence.rs   | 126 ++++++++++++++++++++++++++++++++
 src/lazy/text/value.rs          |   8 +-
 10 files changed, 231 insertions(+), 51 deletions(-)
 create mode 100644 src/lazy/text/raw/sequence.rs

diff --git a/src/lazy/binary/raw/sequence.rs b/src/lazy/binary/raw/sequence.rs
index 66d26fef..a5f2487d 100644
--- a/src/lazy/binary/raw/sequence.rs
+++ b/src/lazy/binary/raw/sequence.rs
@@ -18,11 +18,11 @@ impl<'data> LazyRawBinarySequence<'data> {
         self.value.ion_type()
     }
 
-    pub fn iter(&self) -> RawSequenceIterator<'data> {
+    pub fn iter(&self) -> RawBinarySequenceIterator<'data> {
         // Get as much of the sequence's body as is available in the input buffer.
         // Reading a child value may fail as `Incomplete`
         let buffer_slice = self.value.available_body();
-        RawSequenceIterator::new(buffer_slice)
+        RawBinarySequenceIterator::new(buffer_slice)
     }
 }
 
@@ -33,7 +33,7 @@ impl<'data> LazyContainerPrivate<'data, BinaryEncoding> for LazyRawBinarySequenc
 }
 
 impl<'data> LazyRawSequence<'data, BinaryEncoding> for LazyRawBinarySequence<'data> {
-    type Iterator = RawSequenceIterator<'data>;
+    type Iterator = RawBinarySequenceIterator<'data>;
 
     fn annotations(&self) -> RawBinaryAnnotationsIterator<'data> {
         self.value.annotations()
@@ -54,7 +54,7 @@ impl<'data> LazyRawSequence<'data, BinaryEncoding> for LazyRawBinarySequence<'da
 
 impl<'a, 'data> IntoIterator for &'a LazyRawBinarySequence<'data> {
     type Item = IonResult<LazyRawBinaryValue<'data>>;
-    type IntoIter = RawSequenceIterator<'data>;
+    type IntoIter = RawBinarySequenceIterator<'data>;
 
     fn into_iter(self) -> Self::IntoIter {
         self.iter()
@@ -99,19 +99,19 @@ impl<'a> Debug for LazyRawBinarySequence<'a> {
     }
 }
 
-pub struct RawSequenceIterator<'data> {
+pub struct RawBinarySequenceIterator<'data> {
     source: DataSource<'data>,
 }
 
-impl<'data> RawSequenceIterator<'data> {
-    pub(crate) fn new(input: ImmutableBuffer<'data>) -> RawSequenceIterator<'data> {
-        RawSequenceIterator {
+impl<'data> RawBinarySequenceIterator<'data> {
+    pub(crate) fn new(input: ImmutableBuffer<'data>) -> RawBinarySequenceIterator<'data> {
+        RawBinarySequenceIterator {
             source: DataSource::new(input),
         }
     }
 }
 
-impl<'data> Iterator for RawSequenceIterator<'data> {
+impl<'data> Iterator for RawBinarySequenceIterator<'data> {
     type Item = IonResult<LazyRawBinaryValue<'data>>;
 
     fn next(&mut self) -> Option<Self::Item> {
diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs
index 784879ad..987bd1f6 100644
--- a/src/lazy/encoding.rs
+++ b/src/lazy/encoding.rs
@@ -4,11 +4,12 @@ use crate::lazy::binary::raw::reader::LazyRawBinaryReader;
 use crate::lazy::binary::raw::sequence::LazyRawBinarySequence;
 use crate::lazy::binary::raw::value::LazyRawBinaryValue;
 use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate};
-use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawSequence, LazyRawStruct};
+use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawStruct};
 use crate::lazy::raw_value_ref::RawValueRef;
 use crate::lazy::text::raw::reader::LazyRawTextReader;
+use crate::lazy::text::raw::sequence::LazyRawTextSequence;
 use crate::lazy::text::value::LazyRawTextValue;
-use crate::{IonResult, IonType, RawSymbolTokenRef};
+use crate::{IonResult, RawSymbolTokenRef};
 use std::marker::PhantomData;
 
 // These types derive trait implementations in order to allow types that containing them
@@ -33,34 +34,6 @@ impl<'data> LazyDecoder<'data> for BinaryEncoding {
 // === Placeholders ===
 // The types below will need to be properly defined in order for the lazy text reader to be complete.
 // The exist to satisfy various trait definitions.
-#[derive(Debug, Clone)]
-pub struct ToDoTextSequence;
-
-impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextSequence {
-    fn from_value(_value: LazyRawTextValue<'data>) -> Self {
-        todo!()
-    }
-}
-
-impl<'data> LazyRawSequence<'data, TextEncoding> for ToDoTextSequence {
-    type Iterator = Box<dyn Iterator<Item = IonResult<LazyRawTextValue<'data>>>>;
-
-    fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> {
-        todo!()
-    }
-
-    fn ion_type(&self) -> IonType {
-        todo!()
-    }
-
-    fn iter(&self) -> Self::Iterator {
-        todo!()
-    }
-
-    fn as_value(&self) -> &<TextEncoding as LazyDecoder<'data>>::Value {
-        todo!()
-    }
-}
 
 #[derive(Debug, Clone)]
 pub struct ToDoTextStruct;
@@ -127,7 +100,7 @@ impl<'data> Iterator for ToDoTextAnnotationsIterator<'data> {
 impl<'data> LazyDecoder<'data> for TextEncoding {
     type Reader = LazyRawTextReader<'data>;
     type Value = LazyRawTextValue<'data>;
-    type Sequence = ToDoTextSequence;
+    type Sequence = LazyRawTextSequence<'data>;
     type Struct = ToDoTextStruct;
     type AnnotationsIterator = ToDoTextAnnotationsIterator<'data>;
 }
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index e1c7a5e5..22f17461 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -224,6 +224,37 @@ impl<'data> TextBufferView<'data> {
         ))(self)
     }
 
+    /// Matches a single value in a list OR the end of the list, allowing for leading whitespace
+    /// and comments in either case.
+    ///
+    /// If a value is found, returns `Ok(Some(value))`. If the end of the list is found, returns
+    /// `Ok(None)`.
+    pub fn match_list_value(self) -> IonParseResult<'data, Option<LazyRawTextValue<'data>>> {
+        preceded(
+            // Some amount of whitespace/comments...
+            Self::match_optional_comments_and_whitespace,
+            // ...followed by either the end of the list...
+            alt((
+                value(None, tag("]")),
+                // ...or a value...
+                terminated(
+                    Self::match_value.map(Some),
+                    // ...followed by a comma or end-of-list
+                    Self::match_delimiter_after_list_value,
+                ),
+            )),
+        )(self)
+    }
+
+    /// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or
+    /// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed).
+    fn match_delimiter_after_list_value(self) -> IonMatchResult<'data> {
+        preceded(
+            Self::match_optional_comments_and_whitespace,
+            alt((tag(","), peek(tag("]")))),
+        )(self)
+    }
+
     /// Matches a single top-level scalar value, the beginning of a container, or an IVM.
     pub fn match_top_level(self) -> IonParseResult<'data, RawStreamItem<'data, TextEncoding>> {
         let (remaining, value) = match self.match_value() {
@@ -285,6 +316,12 @@ impl<'data> TextBufferView<'data> {
                     )
                 },
             ),
+            map(
+                match_and_length(tag("[")),
+                |(_matched_list_start, length)| {
+                    EncodedTextValue::new(MatchedValue::List, self.offset(), length)
+                },
+            ),
             // TODO: The other Ion types
         ))
         .map(|encoded_value| LazyRawTextValue {
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
index f0a2c096..bce5d44b 100644
--- a/src/lazy/text/encoded_value.rs
+++ b/src/lazy/text/encoded_value.rs
@@ -8,7 +8,7 @@ use std::ops::Range;
 /// Each [`LazyRawTextValue`](crate::lazy::text::value::LazyRawTextValue) contains an `EncodedValue`,
 /// allowing a user to re-read (that is: parse) the body of the value as many times as necessary
 /// without re-parsing its header information each time.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Copy, Clone, Debug, PartialEq)]
 pub(crate) struct EncodedTextValue {
     // Each encoded text value has up to three components, appearing in the following order:
     //
@@ -117,6 +117,7 @@ impl EncodedTextValue {
             MatchedValue::Float(_) => IonType::Float,
             MatchedValue::String(_) => IonType::String,
             MatchedValue::Symbol(_) => IonType::Symbol,
+            MatchedValue::List => IonType::List,
         }
     }
 
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index db9bdf0b..53fa63b4 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -22,7 +22,6 @@
 use nom::character::is_hex_digit;
 use std::borrow::Cow;
 use std::num::IntErrorKind;
-use std::ops::Range;
 use std::str::FromStr;
 
 use num_bigint::BigInt;
@@ -37,7 +36,7 @@ use crate::result::{DecodingError, IonFailure};
 use crate::{Int, IonError, IonResult, IonType, RawSymbolTokenRef};
 
 /// A partially parsed Ion value.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub(crate) enum MatchedValue {
     // `Null` and `Bool` are fully parsed because they only involve matching a keyword.
     Null(IonType),
@@ -46,6 +45,7 @@ pub(crate) enum MatchedValue {
     Float(MatchedFloat),
     String(MatchedString),
     Symbol(MatchedSymbol),
+    List,
     // TODO: ...the other types
 }
 
@@ -160,7 +160,7 @@ impl MatchedFloat {
     }
 }
 
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub(crate) enum MatchedString {
     /// The string only has one segment. (e.g. "foo")
     Short(MatchedShortString),
@@ -170,11 +170,12 @@ pub(crate) enum MatchedString {
     Long(MatchedLongString),
 }
 
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub(crate) struct MatchedLongString {
-    // Keep a list of all the string segment ranges we found.
-    // If the user asks to read the string, we'll collate the segments into a single string.
-    slices: Vec<Range<usize>>,
+    // TODO: Decide what (if anything) to store here.
+    //       Storing any collection of bytes or ranges means that this type cannot implement Copy,
+    //       which in turn means MatchedValue and EncodedTextValue also cannot implement Copy.
+    //       We probably also don't want to heap allocate just to match the long string.
 }
 
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -382,7 +383,7 @@ fn code_point_is_a_high_surrogate(value: u32) -> bool {
     (0xD800..=0xDFFF).contains(&value)
 }
 
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub(crate) enum MatchedSymbol {
     /// A numeric symbol ID (e.g. `$21`)
     SymbolId,
diff --git a/src/lazy/text/parse_result.rs b/src/lazy/text/parse_result.rs
index 5def24ca..4225ca6e 100644
--- a/src/lazy/text/parse_result.rs
+++ b/src/lazy/text/parse_result.rs
@@ -211,6 +211,26 @@ impl<'data> ParseError<TextBufferView<'data>> for IonParseError<'data> {
     }
 }
 
+/// `Result<Option<T>, _>` has a method called `transpose` that converts it into an `Option<Result<T, _>>`,
+/// allowing it to be easily used in places like iterators that expect that return type.
+/// This trait defines a similar extension method for `Result<(TextBufferView, Option<T>)>`.
+pub(crate) trait ToIteratorOutput<'data, T> {
+    fn transpose(self) -> Option<IonResult<T>>;
+}
+
+impl<'data, T> ToIteratorOutput<'data, T> for IonResult<(TextBufferView<'data>, Option<T>)> {
+    fn transpose(self) -> Option<IonResult<T>> {
+        match self {
+            Ok((_remaining, Some(value))) => Some(Ok(value)),
+            Ok((_remaining, None)) => None,
+            Err(e) => Some(Err(e)),
+        }
+    }
+}
+
+/// Converts the output of a text Ion parser (any of `IonParseResult`, `IonParseError`,
+/// or `nom::Err<IonParseError>`) into a general-purpose `IonResult`. If the implementing type
+/// does not have its own `label` and `input`, the specified values will be used.
 pub(crate) trait AddContext<'data, T> {
     fn with_context(
         self,
diff --git a/src/lazy/text/raw/mod.rs b/src/lazy/text/raw/mod.rs
index 1077754f..a9ad6f8d 100644
--- a/src/lazy/text/raw/mod.rs
+++ b/src/lazy/text/raw/mod.rs
@@ -1 +1,2 @@
 pub mod reader;
+pub mod sequence;
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 48495d81..99ef7537 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -145,6 +145,16 @@ mod tests {
         $0
         $10
         $733
+        
+        [
+            // First item
+            1,
+            // Second item
+            2 /*comment before comma*/,
+            // Third item
+            3
+        ]
+        
         "#,
         );
 
@@ -258,6 +268,13 @@ mod tests {
             RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(733)),
         );
 
+        let list = reader.next()?.expect_value()?.read()?.expect_list()?;
+        let mut sum = 0;
+        for value in &list {
+            sum += value?.read()?.expect_i64()?;
+        }
+        assert_eq!(sum, 6);
+
         Ok(())
     }
 }
diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs
new file mode 100644
index 00000000..ab1f4616
--- /dev/null
+++ b/src/lazy/text/raw/sequence.rs
@@ -0,0 +1,126 @@
+use crate::lazy::decoder::private::LazyContainerPrivate;
+use crate::lazy::decoder::{LazyDecoder, LazyRawSequence, LazyRawValue};
+use crate::lazy::encoding::TextEncoding;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::lazy::text::parse_result::AddContext;
+use crate::lazy::text::parse_result::ToIteratorOutput;
+use crate::lazy::text::value::LazyRawTextValue;
+use crate::{IonResult, IonType};
+use std::fmt;
+use std::fmt::{Debug, Formatter};
+
+#[derive(Copy, Clone)]
+pub struct LazyRawTextSequence<'data> {
+    pub(crate) value: LazyRawTextValue<'data>,
+}
+
+impl<'data> LazyRawTextSequence<'data> {
+    pub fn ion_type(&self) -> IonType {
+        self.value.ion_type()
+    }
+
+    pub fn iter(&self) -> RawTextSequenceIterator<'data> {
+        // Make an iterator over the input bytes that follow the initial `[`
+        RawTextSequenceIterator::new(self.value.input.slice_to_end(1))
+    }
+}
+
+impl<'data> LazyContainerPrivate<'data, TextEncoding> for LazyRawTextSequence<'data> {
+    fn from_value(value: LazyRawTextValue<'data>) -> Self {
+        LazyRawTextSequence { value }
+    }
+}
+
+impl<'data> LazyRawSequence<'data, TextEncoding> for LazyRawTextSequence<'data> {
+    type Iterator = RawTextSequenceIterator<'data>;
+
+    fn annotations(&self) -> <TextEncoding as LazyDecoder<'data>>::AnnotationsIterator {
+        todo!("lazy sequence annotations")
+    }
+
+    fn ion_type(&self) -> IonType {
+        self.value.ion_type()
+    }
+
+    fn iter(&self) -> Self::Iterator {
+        LazyRawTextSequence::iter(self)
+    }
+
+    fn as_value(&self) -> &LazyRawTextValue<'data> {
+        &self.value
+    }
+}
+
+impl<'a, 'data> IntoIterator for &'a LazyRawTextSequence<'data> {
+    type Item = IonResult<LazyRawTextValue<'data>>;
+    type IntoIter = RawTextSequenceIterator<'data>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+impl<'a> Debug for LazyRawTextSequence<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self.value.encoded_value.ion_type() {
+            IonType::SExp => {
+                write!(f, "(")?;
+                for value in self {
+                    write!(
+                        f,
+                        "{:?} ",
+                        value
+                            .map_err(|_| fmt::Error)?
+                            .read()
+                            .map_err(|_| fmt::Error)?
+                    )?;
+                }
+                write!(f, ")").unwrap();
+            }
+            IonType::List => {
+                write!(f, "[")?;
+                for value in self {
+                    write!(
+                        f,
+                        "{:?},",
+                        value
+                            .map_err(|_| fmt::Error)?
+                            .read()
+                            .map_err(|_| fmt::Error)?
+                    )?;
+                }
+                write!(f, "]").unwrap();
+            }
+            _ => unreachable!("LazyRawSequence is only created for list and sexp"),
+        }
+
+        Ok(())
+    }
+}
+
+pub struct RawTextSequenceIterator<'data> {
+    input: TextBufferView<'data>,
+}
+
+impl<'data> RawTextSequenceIterator<'data> {
+    pub(crate) fn new(input: TextBufferView<'data>) -> RawTextSequenceIterator<'data> {
+        RawTextSequenceIterator { input }
+    }
+}
+
+impl<'data> Iterator for RawTextSequenceIterator<'data> {
+    type Item = IonResult<LazyRawTextValue<'data>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.input.match_list_value() {
+            Ok((remaining, Some(value))) => {
+                self.input = remaining;
+                Some(Ok(value))
+            }
+            Ok((_remaining, None)) => None,
+            Err(e) => e
+                .with_context("reading the next list value", self.input)
+                .transpose(),
+        }
+    }
+}
diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs
index 3df2e985..dd33b98a 100644
--- a/src/lazy/text/value.rs
+++ b/src/lazy/text/value.rs
@@ -5,6 +5,7 @@ use crate::lazy::raw_value_ref::RawValueRef;
 use crate::lazy::text::buffer::TextBufferView;
 use crate::lazy::text::encoded_value::EncodedTextValue;
 use crate::lazy::text::matched::MatchedValue;
+use crate::lazy::text::raw::sequence::LazyRawTextSequence;
 use crate::{IonResult, IonType, RawSymbolTokenRef};
 use std::fmt;
 use std::fmt::{Debug, Formatter};
@@ -19,7 +20,7 @@ use std::fmt::{Debug, Formatter};
 /// format than in its binary format, but is still possible.) For a resolved lazy value that
 /// includes a text definition for these items whenever one exists, see
 /// [`crate::lazy::value::LazyValue`].
-#[derive(Clone)]
+#[derive(Copy, Clone)]
 pub struct LazyRawTextValue<'data> {
     pub(crate) encoded_value: EncodedTextValue,
     pub(crate) input: TextBufferView<'data>,
@@ -54,7 +55,10 @@ impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> {
             // ...decimal, timestamp...
             MatchedValue::String(s) => RawValueRef::String(s.read(matched_input)?),
             MatchedValue::Symbol(s) => RawValueRef::Symbol(s.read(matched_input)?),
-            // ...and the rest!
+            MatchedValue::List => {
+                let lazy_sequence = LazyRawTextSequence { value: *self };
+                RawValueRef::List(lazy_sequence)
+            } // ...and the rest!
         };
         Ok(value_ref)
     }

From a6a3aa8c42c801bdbf3f7cb3a51cbabf670bb017 Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Wed, 9 Aug 2023 14:24:08 -1000
Subject: [PATCH 14/15] Adds `LazyRawTextReader` support for structs

---
 src/lazy/decoder.rs            |   8 ++
 src/lazy/encoding.rs           |  59 +--------
 src/lazy/raw_value_ref.rs      |   2 +-
 src/lazy/text/buffer.rs        | 202 +++++++++++++++++++++++++++-
 src/lazy/text/encoded_value.rs |  45 ++++++-
 src/lazy/text/matched.rs       |  86 ++++++------
 src/lazy/text/raw/mod.rs       |   1 +
 src/lazy/text/raw/reader.rs    |  30 ++++-
 src/lazy/text/raw/sequence.rs  | 103 +++++++++++++--
 src/lazy/text/raw/struct.rs    | 232 +++++++++++++++++++++++++++++++++
 src/lazy/text/value.rs         |  15 ++-
 11 files changed, 656 insertions(+), 127 deletions(-)
 create mode 100644 src/lazy/text/raw/struct.rs

diff --git a/src/lazy/decoder.rs b/src/lazy/decoder.rs
index e53ad2d2..c522a073 100644
--- a/src/lazy/decoder.rs
+++ b/src/lazy/decoder.rs
@@ -1,5 +1,6 @@
 use crate::lazy::raw_stream_item::RawStreamItem;
 use crate::lazy::raw_value_ref::RawValueRef;
+use crate::result::IonFailure;
 use crate::{IonResult, IonType, RawSymbolTokenRef};
 use std::fmt::Debug;
 
@@ -86,6 +87,13 @@ pub trait LazyRawStruct<'data, D: LazyDecoder<'data>>:
     fn annotations(&self) -> D::AnnotationsIterator;
     fn find(&self, name: &str) -> IonResult<Option<D::Value>>;
     fn get(&self, name: &str) -> IonResult<Option<RawValueRef<'data, D>>>;
+    fn get_expected(&self, name: &str) -> IonResult<RawValueRef<'data, D>> {
+        if let Some(value) = self.get(name)? {
+            Ok(value)
+        } else {
+            IonResult::decoding_error(format!("did not find expected struct field '{}'", name))
+        }
+    }
     fn iter(&self) -> Self::Iterator;
 }
 
diff --git a/src/lazy/encoding.rs b/src/lazy/encoding.rs
index 987bd1f6..3c6fc0f2 100644
--- a/src/lazy/encoding.rs
+++ b/src/lazy/encoding.rs
@@ -1,16 +1,16 @@
+use std::marker::PhantomData;
+
 use crate::lazy::binary::raw::annotations_iterator::RawBinaryAnnotationsIterator;
 use crate::lazy::binary::raw::r#struct::LazyRawBinaryStruct;
 use crate::lazy::binary::raw::reader::LazyRawBinaryReader;
 use crate::lazy::binary::raw::sequence::LazyRawBinarySequence;
 use crate::lazy::binary::raw::value::LazyRawBinaryValue;
-use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate};
-use crate::lazy::decoder::{LazyDecoder, LazyRawField, LazyRawStruct};
-use crate::lazy::raw_value_ref::RawValueRef;
+use crate::lazy::decoder::LazyDecoder;
+use crate::lazy::text::raw::r#struct::LazyRawTextStruct;
 use crate::lazy::text::raw::reader::LazyRawTextReader;
 use crate::lazy::text::raw::sequence::LazyRawTextSequence;
 use crate::lazy::text::value::LazyRawTextValue;
 use crate::{IonResult, RawSymbolTokenRef};
-use std::marker::PhantomData;
 
 // These types derive trait implementations in order to allow types that containing them
 // to also derive trait implementations.
@@ -35,55 +35,6 @@ impl<'data> LazyDecoder<'data> for BinaryEncoding {
 // The types below will need to be properly defined in order for the lazy text reader to be complete.
 // The exist to satisfy various trait definitions.
 
-#[derive(Debug, Clone)]
-pub struct ToDoTextStruct;
-
-#[derive(Debug, Clone)]
-pub struct ToDoTextField;
-
-impl<'data> LazyRawFieldPrivate<'data, TextEncoding> for ToDoTextField {
-    fn into_value(self) -> LazyRawTextValue<'data> {
-        todo!()
-    }
-}
-
-impl<'data> LazyRawField<'data, TextEncoding> for ToDoTextField {
-    fn name(&self) -> RawSymbolTokenRef<'data> {
-        todo!()
-    }
-
-    fn value(&self) -> &LazyRawTextValue<'data> {
-        todo!()
-    }
-}
-
-impl<'data> LazyContainerPrivate<'data, TextEncoding> for ToDoTextStruct {
-    fn from_value(_value: <TextEncoding as LazyDecoder>::Value) -> Self {
-        todo!()
-    }
-}
-
-impl<'data> LazyRawStruct<'data, TextEncoding> for ToDoTextStruct {
-    type Field = ToDoTextField;
-    type Iterator = Box<dyn Iterator<Item = IonResult<ToDoTextField>>>;
-
-    fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> {
-        todo!()
-    }
-
-    fn find(&self, _name: &str) -> IonResult<Option<LazyRawTextValue<'data>>> {
-        todo!()
-    }
-
-    fn get(&self, _name: &str) -> IonResult<Option<RawValueRef<'data, TextEncoding>>> {
-        todo!()
-    }
-
-    fn iter(&self) -> Self::Iterator {
-        todo!()
-    }
-}
-
 #[derive(Debug, Clone)]
 pub struct ToDoTextAnnotationsIterator<'data> {
     spooky: &'data PhantomData<()>,
@@ -101,6 +52,6 @@ impl<'data> LazyDecoder<'data> for TextEncoding {
     type Reader = LazyRawTextReader<'data>;
     type Value = LazyRawTextValue<'data>;
     type Sequence = LazyRawTextSequence<'data>;
-    type Struct = ToDoTextStruct;
+    type Struct = LazyRawTextStruct<'data>;
     type AnnotationsIterator = ToDoTextAnnotationsIterator<'data>;
 }
diff --git a/src/lazy/raw_value_ref.rs b/src/lazy/raw_value_ref.rs
index 5e76db66..d4c8a614 100644
--- a/src/lazy/raw_value_ref.rs
+++ b/src/lazy/raw_value_ref.rs
@@ -176,7 +176,7 @@ impl<'data, D: LazyDecoder<'data>> RawValueRef<'data, D> {
         if let RawValueRef::Struct(s) = self {
             Ok(s)
         } else {
-            IonResult::decoding_error("expected a struct")
+            IonResult::decoding_error(format!("expected a struct, found: {:?}", self))
         }
     }
 }
diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index 22f17461..d9788f8e 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -1,6 +1,6 @@
 use std::fmt::{Debug, Formatter};
 use std::iter::{Copied, Enumerate};
-use std::ops::{RangeFrom, RangeTo};
+use std::ops::{Range, RangeFrom, RangeTo};
 use std::slice::Iter;
 
 use nom::branch::alt;
@@ -16,10 +16,12 @@ use crate::lazy::encoding::TextEncoding;
 use crate::lazy::raw_stream_item::RawStreamItem;
 use crate::lazy::text::encoded_value::EncodedTextValue;
 use crate::lazy::text::matched::{
-    MatchedFloat, MatchedInt, MatchedShortString, MatchedString, MatchedSymbol, MatchedValue,
+    MatchedFloat, MatchedInt, MatchedString, MatchedSymbol, MatchedValue,
 };
 use crate::lazy::text::parse_result::{InvalidInputError, IonParseError};
 use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
+use crate::lazy::text::raw::r#struct::{LazyRawTextField, RawTextStructIterator};
+use crate::lazy::text::raw::sequence::RawTextSequenceIterator;
 use crate::lazy::text::value::LazyRawTextValue;
 use crate::result::DecodingError;
 use crate::{IonError, IonResult, IonType};
@@ -246,6 +248,78 @@ impl<'data> TextBufferView<'data> {
         )(self)
     }
 
+    /// Matches a struct field name/value pair.
+    ///
+    /// If a pair is found, returns `Some(field)` and consumes the following comma if present.
+    /// If no pair is found (that is: the end of the struct is next), returns `None`.
+    pub fn match_struct_field(self) -> IonParseResult<'data, Option<LazyRawTextField<'data>>> {
+        // A struct field can have leading whitespace, but we want the buffer slice that we match
+        // to begin with the field name. Here we skip any whitespace so we have another named
+        // slice (`input_including_field_name`) with that property.
+        let (input_including_field_name, _ws) = self.match_optional_comments_and_whitespace()?;
+        alt((
+            // If the next thing in the input is a `}`, return `None`.
+            value(None, Self::match_struct_end),
+            // Otherwise, match a name/value pair and turn it into a `LazyRawTextField`.
+            Self::match_struct_field_name_and_value.map(
+                move |((name_syntax, name_span), mut value)| {
+                    // Add the field name offsets to the `EncodedTextValue`
+                    value.encoded_value = value.encoded_value.with_field_name(
+                        name_syntax,
+                        name_span.start,
+                        name_span.len(),
+                    );
+                    // Replace the value's buffer slice (which starts with the value itself) with the
+                    // buffer slice we created that begins with the field name.
+                    value.input = input_including_field_name;
+                    Some(LazyRawTextField { value })
+                },
+            ),
+        ))(input_including_field_name)
+    }
+
+    /// Matches any amount of whitespace followed by a closing `}`.
+    fn match_struct_end(self) -> IonMatchResult<'data> {
+        whitespace_and_then(peek(tag("}"))).parse(self)
+    }
+
+    /// Matches a field name/value pair. Returns the syntax used for the field name, the range of
+    /// input bytes where the field name is found, and the value.
+    pub fn match_struct_field_name_and_value(
+        self,
+    ) -> IonParseResult<'data, ((MatchedSymbol, Range<usize>), LazyRawTextValue<'data>)> {
+        terminated(
+            separated_pair(
+                whitespace_and_then(match_and_span(Self::match_struct_field_name)),
+                whitespace_and_then(tag(":")),
+                whitespace_and_then(Self::match_value),
+            ),
+            whitespace_and_then(alt((tag(","), peek(tag("}"))))),
+        )(self)
+    }
+
+    /// Matches a struct field name. That is:
+    /// * A quoted symbol
+    /// * An identifier
+    /// * A symbol ID
+    /// * A short-form string
+    pub fn match_struct_field_name(self) -> IonParseResult<'data, MatchedSymbol> {
+        alt((
+            Self::match_symbol,
+            Self::match_short_string.map(|s| {
+                // NOTE: We're "casting" the matched short string to a matched symbol here.
+                //       This relies on the fact that the MatchedSymbol logic ignores
+                //       the first and last matched byte, which are usually single
+                //       quotes but in this case are double quotes.
+                match s {
+                    MatchedString::ShortWithoutEscapes => MatchedSymbol::QuotedWithoutEscapes,
+                    MatchedString::ShortWithEscapes => MatchedSymbol::QuotedWithEscapes,
+                    _ => unreachable!("field name parser matched long string"),
+                }
+            }),
+        ))(self)
+    }
+
     /// Matches syntax that is expected to follow a value in a list: any amount of whitespace and/or
     /// comments followed by either a comma (consumed) or an end-of-list `]` (not consumed).
     fn match_delimiter_after_list_value(self) -> IonMatchResult<'data> {
@@ -317,9 +391,15 @@ impl<'data> TextBufferView<'data> {
                 },
             ),
             map(
-                match_and_length(tag("[")),
-                |(_matched_list_start, length)| {
-                    EncodedTextValue::new(MatchedValue::List, self.offset(), length)
+                match_and_length(Self::match_list),
+                |(matched_list, length)| {
+                    EncodedTextValue::new(MatchedValue::List, matched_list.offset(), length)
+                },
+            ),
+            map(
+                match_and_length(Self::match_struct),
+                |(matched_struct, length)| {
+                    EncodedTextValue::new(MatchedValue::Struct, matched_struct.offset(), length)
                 },
             ),
             // TODO: The other Ion types
@@ -331,6 +411,74 @@ impl<'data> TextBufferView<'data> {
         .parse(self)
     }
 
+    /// Matches a list.
+    ///
+    /// If the input does not contain the entire list, returns `IonError::Incomplete(_)`.
+    pub fn match_list(self) -> IonMatchResult<'data> {
+        // If it doesn't start with [, it isn't a list.
+        if self.bytes().first() != Some(&b'[') {
+            let error = InvalidInputError::new(self);
+            return Err(nom::Err::Error(IonParseError::Invalid(error)));
+        }
+        // Scan ahead to find the end of this list.
+        let list_body = self.slice_to_end(1);
+        let sequence_iter = RawTextSequenceIterator::new(b']', list_body);
+        let span = match sequence_iter.find_span() {
+            Ok(span) => span,
+            // If the complete container isn't available, return an incomplete.
+            Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)),
+            // If invalid syntax was encountered, return a failure to prevent nom from trying
+            // other parser kinds.
+            Err(e) => {
+                return {
+                    let error = InvalidInputError::new(self)
+                        .with_label("matching a list")
+                        .with_description(format!("{}", e));
+                    Err(nom::Err::Failure(IonParseError::Invalid(error)))
+                }
+            }
+        };
+
+        // For the matched span, we use `self` again to include the opening `[`
+        let matched = self.slice(0, span.len());
+        let remaining = self.slice_to_end(span.len());
+        Ok((remaining, matched))
+    }
+
+    /// Matches a struct.
+    ///
+    /// If the input does not contain the entire struct, returns `IonError::Incomplete(_)`.
+    pub fn match_struct(self) -> IonMatchResult<'data> {
+        // If it doesn't start with {, it isn't a struct.
+        if self.bytes().first() != Some(&b'{') {
+            let error = InvalidInputError::new(self);
+            return Err(nom::Err::Error(IonParseError::Invalid(error)));
+        }
+        // Scan ahead to find the end of this struct.
+        let struct_body = self.slice_to_end(1);
+        let struct_iter = RawTextStructIterator::new(struct_body);
+        let span = match struct_iter.find_span() {
+            Ok(span) => span,
+            // If the complete container isn't available, return an incomplete.
+            Err(IonError::Incomplete(_)) => return Err(nom::Err::Incomplete(Needed::Unknown)),
+            // If invalid syntax was encountered, return a failure to prevent nom from trying
+            // other parser kinds.
+            Err(e) => {
+                return {
+                    let error = InvalidInputError::new(self)
+                        .with_label("matching a struct")
+                        .with_description(format!("{}", e));
+                    Err(nom::Err::Failure(IonParseError::Invalid(error)))
+                }
+            }
+        };
+
+        // For the matched span, we use `self` again to include the opening `{`
+        let matched = self.slice(0, span.len());
+        let remaining = self.slice_to_end(span.len());
+        Ok((remaining, matched))
+    }
+
     /// Matches a boolean value.
     pub fn match_bool(self) -> IonMatchResult<'data> {
         recognize(Self::read_bool)(self)
@@ -617,7 +765,11 @@ impl<'data> TextBufferView<'data> {
     fn match_short_string(self) -> IonParseResult<'data, MatchedString> {
         delimited(char('"'), Self::match_short_string_body, char('"'))
             .map(|(_matched, contains_escaped_chars)| {
-                MatchedString::Short(MatchedShortString::new(contains_escaped_chars))
+                if contains_escaped_chars {
+                    MatchedString::ShortWithEscapes
+                } else {
+                    MatchedString::ShortWithoutEscapes
+                }
             })
             .parse(self)
     }
@@ -715,7 +867,13 @@ impl<'data> TextBufferView<'data> {
     /// Matches a quoted symbol (`'foo'`).
     fn match_quoted_symbol(self) -> IonParseResult<'data, MatchedSymbol> {
         delimited(char('\''), Self::match_quoted_symbol_body, char('\''))
-            .map(|(_matched, contains_escaped_chars)| MatchedSymbol::Quoted(contains_escaped_chars))
+            .map(|(_matched, contains_escaped_chars)| {
+                if contains_escaped_chars {
+                    MatchedSymbol::QuotedWithEscapes
+                } else {
+                    MatchedSymbol::QuotedWithoutEscapes
+                }
+            })
             .parse(self)
     }
 
@@ -906,6 +1064,18 @@ impl<'data> nom::InputTakeAtPosition for TextBufferView<'data> {
 
 // === end of `nom` trait implementations
 
+fn whitespace_and_then<'data, P, O>(
+    parser: P,
+) -> impl Parser<TextBufferView<'data>, O, IonParseError<'data>>
+where
+    P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
+{
+    preceded(
+        TextBufferView::match_optional_comments_and_whitespace,
+        parser,
+    )
+}
+
 /// Augments a given parser such that it returns the matched value and the number of input bytes
 /// that it matched.
 fn match_and_length<'data, P, O>(
@@ -926,6 +1096,24 @@ where
     }
 }
 
+fn match_and_span<'data, P, O>(
+    mut parser: P,
+) -> impl Parser<TextBufferView<'data>, (O, Range<usize>), IonParseError<'data>>
+where
+    P: Parser<TextBufferView<'data>, O, IonParseError<'data>>,
+{
+    move |input: TextBufferView<'data>| {
+        let offset_before = input.offset();
+        let (remaining, matched) = match parser.parse(input) {
+            Ok((remaining, matched)) => (remaining, matched),
+            Err(e) => return Err(e),
+        };
+        let offset_after = remaining.offset();
+        let span = offset_before..offset_after;
+        Ok((remaining, (matched, span)))
+    }
+}
+
 /// Returns the number of bytes that the provided parser matched.
 fn match_length<'data, P, O>(
     parser: P,
diff --git a/src/lazy/text/encoded_value.rs b/src/lazy/text/encoded_value.rs
index bce5d44b..6a1dbece 100644
--- a/src/lazy/text/encoded_value.rs
+++ b/src/lazy/text/encoded_value.rs
@@ -1,5 +1,7 @@
-use crate::lazy::text::matched::MatchedValue;
-use crate::IonType;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::lazy::text::matched::{MatchedSymbol, MatchedValue};
+use crate::result::IonFailure;
+use crate::{IonResult, IonType};
 use std::ops::Range;
 
 /// Represents the type, offset, and length metadata of the various components of an encoded value
@@ -51,7 +53,7 @@ pub(crate) struct EncodedTextValue {
     // If there is whitespace before the field name, this will not include it.
     field_name_length: u32,
     // The number of bytes used to encode the annotations sequence preceding the data, if any.
-    // If there is no annotations sequence, this will be zero. // If there is whitespace before the
+    // If there is no annotations sequence, this will be zero. If there is whitespace before the
     // annotations sequence, this will not include it.
     annotations_length: u32,
 
@@ -60,6 +62,8 @@ pub(crate) struct EncodedTextValue {
     // value is stored. For others (e.g. a timestamp), the various components of the value are
     // recognized during matching and partial information like subfield offsets can be stored here.
     matched_value: MatchedValue,
+
+    field_name_syntax: Option<MatchedSymbol>,
 }
 
 impl EncodedTextValue {
@@ -76,6 +80,7 @@ impl EncodedTextValue {
             annotations_offset: 0,
             annotations_length: 0,
             matched_value,
+            field_name_syntax: None,
         }
     }
 
@@ -86,7 +91,13 @@ impl EncodedTextValue {
     //   'foo'
     //   "foo"
     //    $10
-    pub(crate) fn with_field_name(mut self, offset: usize, length: usize) -> EncodedTextValue {
+    pub(crate) fn with_field_name(
+        mut self,
+        field_name_syntax: MatchedSymbol,
+        offset: usize,
+        length: usize,
+    ) -> EncodedTextValue {
+        self.field_name_syntax = Some(field_name_syntax);
         self.field_name_offset = (self.data_offset - offset) as u32;
         self.field_name_length = length as u32;
         self
@@ -118,6 +129,7 @@ impl EncodedTextValue {
             MatchedValue::String(_) => IonType::String,
             MatchedValue::Symbol(_) => IonType::Symbol,
             MatchedValue::List => IonType::List,
+            MatchedValue::Struct => IonType::Struct,
         }
     }
 
@@ -125,6 +137,10 @@ impl EncodedTextValue {
         matches!(self.matched_value, MatchedValue::Null(_))
     }
 
+    pub fn data_offset(&self) -> usize {
+        self.data_offset
+    }
+
     pub fn data_length(&self) -> usize {
         self.data_length
     }
@@ -133,6 +149,17 @@ impl EncodedTextValue {
         self.data_offset..(self.data_offset + self.data_length)
     }
 
+    pub fn field_name<'data>(&self, input: TextBufferView<'data>) -> IonResult<&'data str> {
+        if self.field_name_offset == 0 {
+            return IonResult::illegal_operation(
+                "requested field name, but value was not in a struct field",
+            );
+        }
+        let relative_start = self.data_offset - input.offset() - (self.field_name_offset as usize);
+        let field_name_bytes = input.slice(relative_start, self.field_name_length as usize);
+        field_name_bytes.as_text()
+    }
+
     pub fn field_name_range(&self) -> Option<Range<usize>> {
         if self.field_name_offset == 0 {
             return None;
@@ -169,6 +196,10 @@ impl EncodedTextValue {
     pub fn matched(&self) -> &MatchedValue {
         &self.matched_value
     }
+
+    pub fn field_name_syntax(&self) -> Option<MatchedSymbol> {
+        self.field_name_syntax
+    }
 }
 
 #[cfg(test)]
@@ -184,7 +215,7 @@ mod tests {
     #[test]
     fn total_length_data_with_field_name() {
         let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
-            .with_field_name(90, 4);
+            .with_field_name(MatchedSymbol::Identifier, 90, 4);
         assert_eq!(value.total_length(), 22);
     }
 
@@ -198,13 +229,13 @@ mod tests {
     #[test]
     fn total_length_data_with_field_name_and_annotations() {
         let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
-            .with_field_name(90, 4)
+            .with_field_name(MatchedSymbol::Identifier, 90, 4)
             .with_annotations_sequence(94, 6);
         assert_eq!(value.total_length(), 22);
 
         // Same test but with extra whitespace between the components
         let value = EncodedTextValue::new(MatchedValue::Null(IonType::Null), 100, 12)
-            .with_field_name(80, 4)
+            .with_field_name(MatchedSymbol::Identifier, 80, 4)
             .with_annotations_sequence(91, 6);
         assert_eq!(value.total_length(), 32, "{:?}", value);
     }
diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs
index 53fa63b4..e6daf3dc 100644
--- a/src/lazy/text/matched.rs
+++ b/src/lazy/text/matched.rs
@@ -46,6 +46,7 @@ pub(crate) enum MatchedValue {
     String(MatchedString),
     Symbol(MatchedSymbol),
     List,
+    Struct,
     // TODO: ...the other types
 }
 
@@ -53,6 +54,7 @@ pub(crate) enum MatchedValue {
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub(crate) struct MatchedInt {
     radix: u32,
+    // Offset of the digits from the beginning of the value
     digits_offset: usize,
     is_negative: bool,
 }
@@ -163,7 +165,8 @@ impl MatchedFloat {
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub(crate) enum MatchedString {
     /// The string only has one segment. (e.g. "foo")
-    Short(MatchedShortString),
+    ShortWithoutEscapes,
+    ShortWithEscapes,
     /// The string is in multiple segments:
     ///     """hello,"""
     ///     """ world!"""
@@ -178,50 +181,41 @@ pub(crate) struct MatchedLongString {
     //       We probably also don't want to heap allocate just to match the long string.
 }
 
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub(crate) struct MatchedShortString {
-    contains_escaped_chars: bool,
-}
-
-impl MatchedShortString {
-    pub fn new(contains_escaped_chars: bool) -> Self {
-        Self {
-            contains_escaped_chars,
-        }
-    }
-    pub fn contains_escaped_chars(&self) -> bool {
-        self.contains_escaped_chars
-    }
-}
-
 impl MatchedString {
     // Strings longer than 64 bytes will allocate a larger space on the heap.
     const STACK_ALLOC_BUFFER_CAPACITY: usize = 64;
 
     pub fn read<'data>(&self, matched_input: TextBufferView<'data>) -> IonResult<StrRef<'data>> {
         match self {
-            MatchedString::Short(short) => self.read_short_string(*short, matched_input),
+            MatchedString::ShortWithoutEscapes => {
+                self.read_short_string_without_escapes(matched_input)
+            }
+            MatchedString::ShortWithEscapes => self.read_short_string_with_escapes(matched_input),
             MatchedString::Long(_) => todo!("long-form strings"),
         }
     }
 
-    fn read_short_string<'data>(
+    fn read_short_string_without_escapes<'data>(
+        &self,
+        matched_input: TextBufferView<'data>,
+    ) -> IonResult<StrRef<'data>> {
+        // Take a slice of the input that ignores the first and last bytes, which are quotes.
+        let body = matched_input.slice(1, matched_input.len() - 2);
+        // There are no escaped characters, so we can just validate the string in-place.
+        let text = body.as_text()?;
+        let str_ref = StrRef::from(text);
+        Ok(str_ref)
+    }
+
+    fn read_short_string_with_escapes<'data>(
         &self,
-        short: MatchedShortString,
         matched_input: TextBufferView<'data>,
     ) -> IonResult<StrRef<'data>> {
         // Take a slice of the input that ignores the first and last bytes, which are quotes.
         let body = matched_input.slice(1, matched_input.len() - 2);
-        if !short.contains_escaped_chars() {
-            // There are no escaped characters, so we can just validate the string in-place.
-            let text = body.as_text()?;
-            let str_ref = StrRef::from(text);
-            return Ok(str_ref);
-        }
         // Otherwise, there are escaped characters. We need to build a new version of our string
         // that replaces the escaped characters with their corresponding bytes.
         let mut sanitized = Vec::with_capacity(matched_input.len());
-
         escape_text(body, &mut sanitized)?;
         let text = String::from_utf8(sanitized).unwrap();
         Ok(StrRef::from(text.to_string()))
@@ -389,9 +383,10 @@ pub(crate) enum MatchedSymbol {
     SymbolId,
     /// The symbol is an unquoted identifier (e.g. `foo`)
     Identifier,
-    /// The symbol is delimited by single quotes. Holds a `bool` indicating whether the
-    /// matched input contained any escaped bytes.
-    Quoted(bool),
+    /// The symbol is delimited by single quotes but contains no escape sequences.
+    QuotedWithoutEscapes,
+    /// The symbol is delimited by single quotes and has at least one escape sequence.
+    QuotedWithEscapes,
     // TODO: Operators in S-Expressions
 }
 
@@ -403,27 +398,31 @@ impl MatchedSymbol {
         match self {
             MatchedSymbol::SymbolId => self.read_symbol_id(matched_input),
             MatchedSymbol::Identifier => self.read_identifier(matched_input),
-            MatchedSymbol::Quoted(contains_escaped_chars) => {
-                self.read_quoted(matched_input, *contains_escaped_chars)
-            }
+            MatchedSymbol::QuotedWithEscapes => self.read_quoted_with_escapes(matched_input),
+            MatchedSymbol::QuotedWithoutEscapes => self.read_quoted_without_escapes(matched_input),
         }
     }
 
-    fn read_quoted<'data>(
+    pub(crate) fn read_quoted_without_escapes<'data>(
         &self,
         matched_input: TextBufferView<'data>,
-        contains_escaped_chars: bool,
     ) -> IonResult<RawSymbolTokenRef<'data>> {
         // Take a slice of the input that ignores the first and last bytes, which are quotes.
         let body = matched_input.slice(1, matched_input.len() - 2);
-        if !contains_escaped_chars {
-            // There are no escaped characters, so we can just validate the string in-place.
-            let text = body.as_text()?;
-            let str_ref = RawSymbolTokenRef::Text(text.into());
-            return Ok(str_ref);
-        }
+        // There are no escaped characters, so we can just validate the string in-place.
+        let text = body.as_text()?;
+        let str_ref = RawSymbolTokenRef::Text(text.into());
+        Ok(str_ref)
+    }
 
-        // Otherwise, there are escaped characters. We need to build a new version of our symbol
+    pub(crate) fn read_quoted_with_escapes<'data>(
+        &self,
+        matched_input: TextBufferView<'data>,
+    ) -> IonResult<RawSymbolTokenRef<'data>> {
+        // Take a slice of the input that ignores the first and last bytes, which are quotes.
+        let body = matched_input.slice(1, matched_input.len() - 2);
+
+        // There are escaped characters. We need to build a new version of our symbol
         // that replaces the escaped characters with their corresponding bytes.
         let mut sanitized = Vec::with_capacity(matched_input.len());
 
@@ -431,7 +430,8 @@ impl MatchedSymbol {
         let text = String::from_utf8(sanitized).unwrap();
         Ok(RawSymbolTokenRef::Text(text.into()))
     }
-    fn read_identifier<'data>(
+
+    pub(crate) fn read_identifier<'data>(
         &self,
         matched_input: TextBufferView<'data>,
     ) -> IonResult<RawSymbolTokenRef<'data>> {
diff --git a/src/lazy/text/raw/mod.rs b/src/lazy/text/raw/mod.rs
index a9ad6f8d..43f7a659 100644
--- a/src/lazy/text/raw/mod.rs
+++ b/src/lazy/text/raw/mod.rs
@@ -1,2 +1,3 @@
 pub mod reader;
 pub mod sequence;
+pub mod r#struct;
diff --git a/src/lazy/text/raw/reader.rs b/src/lazy/text/raw/reader.rs
index 99ef7537..9ff7a5c0 100644
--- a/src/lazy/text/raw/reader.rs
+++ b/src/lazy/text/raw/reader.rs
@@ -39,7 +39,7 @@ impl<'data> LazyRawTextReader<'data> {
     {
         let buffer = self.buffer;
         if buffer.is_empty() {
-            return IonResult::incomplete("reading a top-level value", buffer.offset());
+            return Ok(RawStreamItem::EndOfStream);
         }
 
         let (buffer_after_whitespace, _whitespace) =
@@ -55,7 +55,8 @@ impl<'data> LazyRawTextReader<'data> {
         let (remaining, matched) = buffer_after_whitespace
             .match_top_level()
             .with_context("reading a top-level value", buffer_after_whitespace)?;
-        // If we successfully moved to the next value, store the remaining buffer view
+        // Since we successfully matched the next value, we'll update the buffer
+        // so a future call to `next()` will resume parsing the remaining input.
         self.buffer = remaining;
         Ok(matched)
     }
@@ -73,11 +74,12 @@ impl<'data> LazyRawReader<'data, TextEncoding> for LazyRawTextReader<'data> {
 
 #[cfg(test)]
 mod tests {
-    use super::*;
-    use crate::lazy::decoder::LazyRawValue;
+    use crate::lazy::decoder::{LazyRawStruct, LazyRawValue};
     use crate::lazy::raw_value_ref::RawValueRef;
     use crate::{IonType, RawSymbolTokenRef};
 
+    use super::*;
+
     #[test]
     fn test_top_level() -> IonResult<()> {
         let mut data = String::new();
@@ -155,6 +157,15 @@ mod tests {
             3
         ]
         
+        {
+            // Identifier 
+            foo: 100,
+            // Quoted symbol
+            'bar': 200,
+            // Short-form string
+            "baz": 300
+        }
+        
         "#,
         );
 
@@ -268,6 +279,7 @@ mod tests {
             RawValueRef::Symbol(RawSymbolTokenRef::SymbolId(733)),
         );
 
+        // [1, 2, 3]
         let list = reader.next()?.expect_value()?.read()?.expect_list()?;
         let mut sum = 0;
         for value in &list {
@@ -275,6 +287,16 @@ mod tests {
         }
         assert_eq!(sum, 6);
 
+        // {foo: 100, bar: 200, baz: 300}
+        let item = reader.next()?;
+        let value = item.expect_value()?.read()?;
+        let strukt = value.expect_struct()?;
+        let mut sum = 0;
+        sum += strukt.get_expected("foo")?.expect_i64()?;
+        sum += strukt.get_expected("bar")?.expect_i64()?;
+        sum += strukt.get_expected("baz")?.expect_i64()?;
+        assert_eq!(sum, 600);
+
         Ok(())
     }
 }
diff --git a/src/lazy/text/raw/sequence.rs b/src/lazy/text/raw/sequence.rs
index ab1f4616..2e4e7e1c 100644
--- a/src/lazy/text/raw/sequence.rs
+++ b/src/lazy/text/raw/sequence.rs
@@ -1,3 +1,9 @@
+use std::fmt;
+use std::fmt::{Debug, Formatter};
+use std::ops::Range;
+
+use nom::character::streaming::satisfy;
+
 use crate::lazy::decoder::private::LazyContainerPrivate;
 use crate::lazy::decoder::{LazyDecoder, LazyRawSequence, LazyRawValue};
 use crate::lazy::encoding::TextEncoding;
@@ -6,8 +12,6 @@ use crate::lazy::text::parse_result::AddContext;
 use crate::lazy::text::parse_result::ToIteratorOutput;
 use crate::lazy::text::value::LazyRawTextValue;
 use crate::{IonResult, IonType};
-use std::fmt;
-use std::fmt::{Debug, Formatter};
 
 #[derive(Copy, Clone)]
 pub struct LazyRawTextSequence<'data> {
@@ -21,7 +25,7 @@ impl<'data> LazyRawTextSequence<'data> {
 
     pub fn iter(&self) -> RawTextSequenceIterator<'data> {
         // Make an iterator over the input bytes that follow the initial `[`
-        RawTextSequenceIterator::new(self.value.input.slice_to_end(1))
+        RawTextSequenceIterator::new(b']', self.value.input.slice_to_end(1))
     }
 }
 
@@ -98,13 +102,50 @@ impl<'a> Debug for LazyRawTextSequence<'a> {
     }
 }
 
+#[derive(Copy, Clone, Debug)]
 pub struct RawTextSequenceIterator<'data> {
+    end_delimiter: u8,
     input: TextBufferView<'data>,
+    // If this iterator has returned an error, it should return `None` forever afterwards
+    has_returned_error: bool,
+}
+
+impl<'data> RawTextSequenceIterator<'data> {
+    pub(crate) fn new(
+        end_delimiter: u8,
+        input: TextBufferView<'data>,
+    ) -> RawTextSequenceIterator<'data> {
+        RawTextSequenceIterator {
+            end_delimiter,
+            input,
+            has_returned_error: false,
+        }
+    }
 }
 
 impl<'data> RawTextSequenceIterator<'data> {
-    pub(crate) fn new(input: TextBufferView<'data>) -> RawTextSequenceIterator<'data> {
-        RawTextSequenceIterator { input }
+    pub(crate) fn find_span(&self) -> IonResult<Range<usize>> {
+        // The input has already skipped past the opening delimiter.
+        let start = self.input.offset() - 1;
+        // We need to find the input slice containing the closing delimiter. It's either...
+        let input_after_last = if let Some(value_result) = self.last() {
+            let value = value_result?;
+            // ...the input slice that follows the last sequence value...
+            value.input.slice_to_end(value.encoded_value.total_length())
+        } else {
+            // ...or there aren't values, so it's just the input after the opening delimiter.
+            self.input
+        };
+        let (input_after_ws, _ws) = input_after_last
+            .match_optional_comments_and_whitespace()
+            .with_context("seeking the end of a sequence", input_after_last)?;
+        let (input_after_end, _end_delimiter) =
+            satisfy(|c| c == self.end_delimiter as char)(input_after_ws).with_context(
+                "seeking the closing delimiter of a sequence",
+                input_after_ws,
+            )?;
+        let end = input_after_end.offset();
+        Ok(start..end)
     }
 }
 
@@ -112,15 +153,61 @@ impl<'data> Iterator for RawTextSequenceIterator<'data> {
     type Item = IonResult<LazyRawTextValue<'data>>;
 
     fn next(&mut self) -> Option<Self::Item> {
+        if self.has_returned_error {
+            return None;
+        }
         match self.input.match_list_value() {
             Ok((remaining, Some(value))) => {
                 self.input = remaining;
                 Some(Ok(value))
             }
             Ok((_remaining, None)) => None,
-            Err(e) => e
-                .with_context("reading the next list value", self.input)
-                .transpose(),
+            Err(e) => {
+                self.has_returned_error = true;
+                e.with_context("reading the next list value", self.input)
+                    .transpose()
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use crate::lazy::text::raw::reader::LazyRawTextReader;
+    use crate::IonResult;
+
+    fn expect_sequence_range(ion_data: &str, expected: Range<usize>) -> IonResult<()> {
+        let reader = &mut LazyRawTextReader::new(ion_data.as_bytes());
+        let value = reader.next()?.expect_value()?;
+        let actual_range = value.encoded_value.data_range();
+        assert_eq!(
+            actual_range, expected,
+            "Sequence range ({:?}) did not match expected range ({:?})",
+            actual_range, expected
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn list_range() -> IonResult<()> {
+        // For each pair below, we'll confirm that the top-level list is found to
+        // occupy the specified input span.
+        let tests = &[
+            // (Ion input, expected range of the sequence)
+            ("[]", 0..2),
+            ("  []  ", 2..4),
+            ("[1, 2]", 0..6),
+            ("[1, /* comment ]]] */ 2]", 0..24),
+            // Nested
+            ("[1, 2, [3, 4, 5], 6]", 0..20),
+            // Doubly nested
+            ("[1, 2, [3, [a, b, c], 5], 6]", 0..28),
+        ];
+        for test in tests {
+            expect_sequence_range(test.0, test.1.clone())?;
         }
+        Ok(())
     }
 }
diff --git a/src/lazy/text/raw/struct.rs b/src/lazy/text/raw/struct.rs
new file mode 100644
index 00000000..f9f3742d
--- /dev/null
+++ b/src/lazy/text/raw/struct.rs
@@ -0,0 +1,232 @@
+use crate::lazy::decoder::private::{LazyContainerPrivate, LazyRawFieldPrivate};
+use crate::lazy::decoder::{LazyRawField, LazyRawStruct, LazyRawValue};
+use crate::lazy::encoding::{TextEncoding, ToDoTextAnnotationsIterator};
+use crate::lazy::raw_value_ref::RawValueRef;
+use crate::lazy::text::buffer::TextBufferView;
+use crate::lazy::text::parse_result::{AddContext, ToIteratorOutput};
+use crate::lazy::text::value::LazyRawTextValue;
+use crate::raw_symbol_token_ref::AsRawSymbolTokenRef;
+use crate::{IonResult, RawSymbolTokenRef};
+use nom::character::streaming::satisfy;
+use std::ops::Range;
+
+#[derive(Clone, Copy, Debug)]
+pub struct RawTextStructIterator<'data> {
+    input: TextBufferView<'data>,
+    has_returned_error: bool,
+}
+
+impl<'data> RawTextStructIterator<'data> {
+    pub(crate) fn new(input: TextBufferView<'data>) -> Self {
+        RawTextStructIterator {
+            input,
+            has_returned_error: false,
+        }
+    }
+
+    pub(crate) fn find_span(&self) -> IonResult<Range<usize>> {
+        // The input has already skipped past the opening delimiter.
+        let start = self.input.offset() - 1;
+        // We need to find the input slice containing the closing delimiter. It's either...
+        let input_after_last = if let Some(field_result) = self.last() {
+            let field = field_result?;
+            // ...the input slice that follows the last field...
+            field
+                .value
+                .input
+                .slice_to_end(field.value.encoded_value.total_length())
+        } else {
+            // ...or there aren't fields, so it's just the input after the opening delimiter.
+            self.input
+        };
+        let (input_after_ws, _ws) = input_after_last
+            .match_optional_comments_and_whitespace()
+            .with_context("seeking the end of a struct", input_after_last)?;
+        let (input_after_end, _end_delimiter) = satisfy(|c| c == b'}' as char)(input_after_ws)
+            .with_context("seeking the closing delimiter of a struct", input_after_ws)?;
+        let end = input_after_end.offset();
+        Ok(start..end)
+    }
+}
+
+impl<'data> Iterator for RawTextStructIterator<'data> {
+    type Item = IonResult<LazyRawTextField<'data>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.has_returned_error {
+            return None;
+        }
+        match self.input.match_struct_field() {
+            Ok((remaining_input, Some(field))) => {
+                self.input = remaining_input;
+                Some(Ok(field))
+            }
+            Ok((_, None)) => None,
+            Err(e) => {
+                self.has_returned_error = true;
+                e.with_context("reading the next struct field", self.input)
+                    .transpose()
+            }
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct LazyRawTextField<'data> {
+    pub(crate) value: LazyRawTextValue<'data>,
+}
+
+impl<'data> LazyRawTextField<'data> {
+    pub(crate) fn new(value: LazyRawTextValue<'data>) -> Self {
+        LazyRawTextField { value }
+    }
+
+    pub fn name(&self) -> RawSymbolTokenRef<'data> {
+        // We're in a struct field, the field name _must_ be populated.
+        // If it's not (or the field name is not a valid SID or UTF-8 string),
+        // that's a bug. We can safely unwrap/expect here.
+        let matched_symbol = self
+            .value
+            .encoded_value
+            .field_name_syntax()
+            .expect("field name syntax not available");
+        let name_length = self
+            .value
+            .encoded_value
+            .field_name_range()
+            .expect("field name length not available")
+            .len();
+        matched_symbol
+            .read(self.value.input.slice(0, name_length))
+            .expect("invalid struct field name")
+    }
+
+    pub fn value(&self) -> &LazyRawTextValue<'data> {
+        &self.value
+    }
+
+    pub(crate) fn into_value(self) -> LazyRawTextValue<'data> {
+        self.value
+    }
+}
+
+impl<'data> LazyRawFieldPrivate<'data, TextEncoding> for LazyRawTextField<'data> {
+    fn into_value(self) -> LazyRawTextValue<'data> {
+        self.value
+    }
+}
+
+impl<'data> LazyRawField<'data, TextEncoding> for LazyRawTextField<'data> {
+    fn name(&self) -> RawSymbolTokenRef<'data> {
+        LazyRawTextField::name(self)
+    }
+
+    fn value(&self) -> &LazyRawTextValue<'data> {
+        LazyRawTextField::value(self)
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct LazyRawTextStruct<'data> {
+    pub(crate) value: LazyRawTextValue<'data>,
+}
+
+impl<'data> LazyRawTextStruct<'data> {
+    fn find(&self, name: &str) -> IonResult<Option<LazyRawTextValue<'data>>> {
+        let name: RawSymbolTokenRef = name.as_raw_symbol_token_ref();
+        for field_result in *self {
+            let field = field_result?;
+            let field_name = field.name();
+            if field_name == name {
+                let value = field.value;
+                return Ok(Some(value));
+            }
+        }
+        Ok(None)
+    }
+
+    fn get(&self, name: &str) -> IonResult<Option<RawValueRef<'data, TextEncoding>>> {
+        self.find(name)?.map(|f| f.read()).transpose()
+    }
+}
+
+impl<'data> LazyContainerPrivate<'data, TextEncoding> for LazyRawTextStruct<'data> {
+    fn from_value(value: LazyRawTextValue<'data>) -> Self {
+        LazyRawTextStruct { value }
+    }
+}
+
+impl<'data> LazyRawStruct<'data, TextEncoding> for LazyRawTextStruct<'data> {
+    type Field = LazyRawTextField<'data>;
+    type Iterator = RawTextStructIterator<'data>;
+
+    fn annotations(&self) -> ToDoTextAnnotationsIterator<'data> {
+        todo!()
+    }
+
+    fn find(&self, name: &str) -> IonResult<Option<LazyRawTextValue<'data>>> {
+        self.find(name)
+    }
+
+    fn get(&self, name: &str) -> IonResult<Option<RawValueRef<'data, TextEncoding>>> {
+        self.get(name)
+    }
+
+    fn iter(&self) -> Self::Iterator {
+        // Slice the input to skip the opening `{`
+        RawTextStructIterator::new(self.value.input.slice_to_end(1))
+    }
+}
+
+impl<'data> IntoIterator for LazyRawTextStruct<'data> {
+    type Item = IonResult<LazyRawTextField<'data>>;
+    type IntoIter = RawTextStructIterator<'data>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use crate::lazy::text::raw::reader::LazyRawTextReader;
+    use crate::IonResult;
+
+    fn expect_struct_range(ion_data: &str, expected: Range<usize>) -> IonResult<()> {
+        let reader = &mut LazyRawTextReader::new(ion_data.as_bytes());
+        let value = reader.next()?.expect_value()?;
+        let actual_range = value.encoded_value.data_range();
+        assert_eq!(
+            actual_range, expected,
+            "Struct range ({:?}) did not match expected range ({:?})",
+            actual_range, expected
+        );
+        println!("input ok: {}", ion_data);
+        Ok(())
+    }
+
+    #[test]
+    fn struct_range() -> IonResult<()> {
+        // For each pair below, we'll confirm that the top-level list is found to
+        // occupy the specified input span.
+        let tests = &[
+            // (Ion input, expected range of the struct)
+            ("{}", 0..2),
+            ("  {}  ", 2..4),
+            ("{a:1}", 0..5),
+            ("{a: 1}", 0..6),
+            ("{a: 1, b: 2}", 0..12),
+            ("{a: 1, /* comment }}} */ b: 2}", 0..30),
+            // Nested
+            ("{a: 1, b: 2, c: {d: 3, e: 4, f: 5}, g: 6}", 0..41),
+            // Doubly nested
+            ("{a: 1, b: 2, c: {d: 3, e: {foo: bar}, f: 5}, g: 6}", 0..50),
+        ];
+        for test in tests {
+            expect_struct_range(test.0, test.1.clone())?;
+        }
+        Ok(())
+    }
+}
diff --git a/src/lazy/text/value.rs b/src/lazy/text/value.rs
index dd33b98a..0059dd8b 100644
--- a/src/lazy/text/value.rs
+++ b/src/lazy/text/value.rs
@@ -1,3 +1,6 @@
+use std::fmt;
+use std::fmt::{Debug, Formatter};
+
 use crate::lazy::decoder::private::LazyRawValuePrivate;
 use crate::lazy::decoder::{LazyDecoder, LazyRawValue};
 use crate::lazy::encoding::TextEncoding;
@@ -5,10 +8,9 @@ use crate::lazy::raw_value_ref::RawValueRef;
 use crate::lazy::text::buffer::TextBufferView;
 use crate::lazy::text::encoded_value::EncodedTextValue;
 use crate::lazy::text::matched::MatchedValue;
+use crate::lazy::text::raw::r#struct::LazyRawTextStruct;
 use crate::lazy::text::raw::sequence::LazyRawTextSequence;
 use crate::{IonResult, IonType, RawSymbolTokenRef};
-use std::fmt;
-use std::fmt::{Debug, Formatter};
 
 /// A value that has been identified in the text input stream but whose data has not yet been read.
 ///
@@ -46,7 +48,10 @@ impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> {
     }
 
     fn read(&self) -> IonResult<RawValueRef<'data, TextEncoding>> {
-        let matched_input = self.input.slice(0, self.encoded_value.data_length());
+        let matched_input = self.input.slice(
+            self.encoded_value.data_offset() - self.input.offset(),
+            self.encoded_value.data_length(),
+        );
         let value_ref = match self.encoded_value.matched() {
             MatchedValue::Null(ion_type) => RawValueRef::Null(*ion_type),
             MatchedValue::Bool(b) => RawValueRef::Bool(*b),
@@ -58,6 +63,10 @@ impl<'data> LazyRawValue<'data, TextEncoding> for LazyRawTextValue<'data> {
             MatchedValue::List => {
                 let lazy_sequence = LazyRawTextSequence { value: *self };
                 RawValueRef::List(lazy_sequence)
+            }
+            MatchedValue::Struct => {
+                let lazy_struct = LazyRawTextStruct { value: *self };
+                RawValueRef::Struct(lazy_struct)
             } // ...and the rest!
         };
         Ok(value_ref)

From 4fc9078592d567843cea4e00124a9722e15f11fc Mon Sep 17 00:00:00 2001
From: Zack Slayton <zslayton@amazon.com>
Date: Thu, 10 Aug 2023 11:57:35 -1000
Subject: [PATCH 15/15] More doc comments

---
 src/lazy/text/buffer.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/lazy/text/buffer.rs b/src/lazy/text/buffer.rs
index d9788f8e..b78f8584 100644
--- a/src/lazy/text/buffer.rs
+++ b/src/lazy/text/buffer.rs
@@ -1064,6 +1064,8 @@ impl<'data> nom::InputTakeAtPosition for TextBufferView<'data> {
 
 // === end of `nom` trait implementations
 
+/// Takes a given parser and returns a new one that accepts any amount of leading whitespace before
+/// calling the original parser.
 fn whitespace_and_then<'data, P, O>(
     parser: P,
 ) -> impl Parser<TextBufferView<'data>, O, IonParseError<'data>>
@@ -1096,6 +1098,8 @@ where
     }
 }
 
+/// Augments a given parser such that it returns the matched value and the range of input bytes
+/// that it matched.
 fn match_and_span<'data, P, O>(
     mut parser: P,
 ) -> impl Parser<TextBufferView<'data>, (O, Range<usize>), IonParseError<'data>>