Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates symbol logic in the Element APIs #426

Merged
merged 4 commits into from
Sep 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions src/binary/binary_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,10 @@ impl<W: Write> BinaryWriter<W> {
.set_field_name(system_symbol_ids::SYMBOLS);
self.symbol_table_writer.step_in(IonType::List)?;
for symbol in pending_symbols {
if let Some(text) = symbol {
self.symbol_table_writer.write_string(text)?;
} else {
self.symbol_table_writer.write_null(IonType::Null)?;
}
match symbol.text() {
Some(text) => self.symbol_table_writer.write_string(text),
None => self.symbol_table_writer.write_null(IonType::Null),
}?;
}
self.symbol_table_writer.step_out()?; // End symbols list

Expand Down Expand Up @@ -227,7 +226,7 @@ mod tests {
reader.step_in()?;
assert_eq!(Value(IonType::Symbol), reader.next()?);
assert_eq!("foo", reader.field_name()?);
assert_eq!("bar", reader.read_symbol()?.as_ref());
assert_eq!("bar", reader.read_symbol()?.text_or_error()?);

Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion src/binary/non_blocking/raw_binary_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1273,7 +1273,7 @@ impl<'a, A: AsRef<[u8]>> TxReader<'a, A> {
/// * Out of tx_buffer bytes
fn is_eof(&self) -> bool {
// We're at the top level
self.parent == None
self.parent.is_none()
&& self.encoded_value.annotations_header_length == 0
&& self.tx_buffer.is_empty()
}
Expand Down
5 changes: 4 additions & 1 deletion src/raw_symbol_token_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ impl AsRawSymbolTokenRef for &str {

impl AsRawSymbolTokenRef for Symbol {
fn as_raw_symbol_token_ref(&self) -> RawSymbolTokenRef {
RawSymbolTokenRef::Text(self.as_ref())
match self.text() {
Some(text) => RawSymbolTokenRef::Text(text),
None => RawSymbolTokenRef::SymbolId(0),
}
}
}

Expand Down
152 changes: 93 additions & 59 deletions src/symbol.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use crate::result::decoding_error;
use crate::IonResult;
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::rc::Rc;

/// Stores or points to the text of a given [Symbol].
Expand All @@ -12,28 +13,28 @@ enum SymbolText {
Shared(Rc<str>),
// This Symbol owns its own text
Owned(String),
// This Symbol is equivalent to SID zero (`$0`)
Unknown,
}

impl AsRef<str> for SymbolText {
fn as_ref(&self) -> &str {
match self {
SymbolText::Owned(ref text) => text.as_str(),
SymbolText::Shared(ref rc) => rc.as_ref(),
}
}
}

impl<A: AsRef<str>> PartialEq<A> for SymbolText {
fn eq(&self, other: &A) -> bool {
// Compare the Symbols' text, not their ownership models
self.as_ref() == other.as_ref()
impl SymbolText {
fn text(&self) -> Option<&str> {
let text = match self {
SymbolText::Shared(s) => s.as_ref(),
SymbolText::Owned(s) => s.as_str(),
SymbolText::Unknown => return None,
};
Some(text)
}
}

impl Hash for SymbolText {
fn hash<H: Hasher>(&self, state: &mut H) {
// Hash the Symbol's text, ignore where/how it's stored.
self.as_ref().hash(state)
match self {
SymbolText::Shared(text) => text.hash(state),
SymbolText::Owned(text) => text.hash(state),
SymbolText::Unknown => 0.hash(state),
}
}
}

Expand All @@ -42,21 +43,48 @@ impl Clone for SymbolText {
match self {
SymbolText::Owned(text) => SymbolText::Owned(text.to_owned()),
SymbolText::Shared(text) => SymbolText::Shared(Rc::clone(text)),
SymbolText::Unknown => SymbolText::Unknown,
}
}
}

/// The text of a fully resolved field name, annotation, or symbol value. The text stored in this
/// Symbol may be either a `String` or a shared reference to text in a symbol table.
#[derive(Debug, Hash, Clone, Eq)]
impl PartialEq<Self> for SymbolText {
fn eq(&self, other: &Self) -> bool {
self.cmp(other) == Ordering::Equal
}
}

impl PartialOrd<Self> for SymbolText {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for SymbolText {
fn cmp(&self, other: &Self) -> Ordering {
match (self.text(), other.text()) {
// If both Symbols have known text, delegate the comparison to their text.
(Some(s1), Some(s2)) => s1.cmp(s2),
// Otherwise, $0 (unknown text) is treated as 'less than' known text
(Some(_), None) => Ordering::Greater,
(None, Some(_)) => Ordering::Less,
(None, None) => Ordering::Equal,
}
}
}

/// The text of a fully resolved field name, annotation, or symbol value. If the symbol has known
/// text (that is: the symbol is not `$0`), it will be stored as either a `String` or a shared
/// reference to text in a symbol table.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct Symbol {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Question) How does this handle Symbol with undefined text?

Undefined text - text in a SymbolToken structure which is undefined, e.g. null. SymbolTokens with undefined text do not necessarily refer to symbols with unknown text because the text may be resolvable through the SymbolToken’s importLocation

Undefined text reference from https://amzn.github.io/ion-docs/guides/symbols-guide.html

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A Symbol represents a resolved symbol, which is to say that this part:

SymbolTokens with undefined text do not necessarily refer to symbols with unknown text because the text may be resolvable through the SymbolToken’s importLocation

has already been attempted. If it failed, then an error would have been returned by the reader and we wouldn't be constructing a Symbol. If we have a Symbol, then the resolution step has already succeeded.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If, after resolution, the Symbol is found to explicitly have undefined text ($0 is the main example), then the Symbol's inner SymbolText enum will be set to Unknown.

text: SymbolText,
}

impl Symbol {
pub fn owned(text: String) -> Symbol {
pub fn owned<I: Into<String>>(text: I) -> Symbol {
Symbol {
text: SymbolText::Owned(text),
text: SymbolText::Owned(text.into()),
}
}

Expand All @@ -65,62 +93,68 @@ impl Symbol {
text: SymbolText::Shared(text),
}
}

pub fn unknown_text() -> Symbol {
Symbol {
text: SymbolText::Unknown,
}
}

pub fn text(&self) -> Option<&str> {
self.text.text()
}

pub fn text_or_error(&self) -> IonResult<&str> {
match self.text() {
Some(text) => Ok(text),
None => decoding_error("symbol has unknown text"),
}
}
}

impl From<&str> for Symbol {
fn from(text: &str) -> Self {
Symbol::owned(text)
}
}

impl Display for Symbol {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.text.as_ref())
match self.text() {
None => write!(f, "$0"),
Some(text) => write!(f, "'{}'", text),
}
}
}

impl<A: AsRef<str>> PartialEq<A> for Symbol {
fn eq(&self, other: &A) -> bool {
self.text.as_ref() == other.as_ref()
self.text()
// If the symbol has known text, compare it to the provide text
.map(|t| t == other.as_ref())
// If there's no text, it's definitely not equivalent to the provided text
.unwrap_or(false)
}
}

impl PartialEq<Symbol> for String {
fn eq(&self, other: &Symbol) -> bool {
self.as_str() == other.as_ref()
other.text().map(|t| t == self.as_str()).unwrap_or(false)
}
}

impl PartialEq<Symbol> for &str {
fn eq(&self, other: &Symbol) -> bool {
self == &other.as_ref()
}
}

impl AsRef<str> for Symbol {
fn as_ref(&self) -> &str {
self.text.as_ref()
other.text().map(|t| &t == self).unwrap_or(false)
}
}

impl Deref for Symbol {
type Target = str;

fn deref(&self) -> &Self::Target {
self.text.as_ref()
}
}

// Allows a HashMap<Symbol, _> to do lookups with a &str instead of a &Symbol
// Note that this method panics if the Symbol has unknown text! This is unfortunate but is required
// in order to allow a HashMap<Symbol, _> to do lookups with a &str instead of a &Symbol
impl Borrow<str> for Symbol {
fn borrow(&self) -> &str {
self.as_ref()
}
}

impl<A: AsRef<str>> PartialOrd<A> for Symbol {
fn partial_cmp(&self, other: &A) -> Option<Ordering> {
self.text.as_ref().partial_cmp(other.as_ref())
}
}

impl Ord for Symbol {
fn cmp(&self, other: &Self) -> Ordering {
self.text.as_ref().cmp(other.as_ref())
self.text()
.expect("cannot borrow a &str from a Symbol with unknown text")
}
}

Expand All @@ -131,21 +165,21 @@ mod symbol_tests {
#[test]
fn ordering_and_eq() {
let mut symbols = vec![
Symbol::owned("foo".to_owned()),
Symbol::owned("foo"),
Symbol::shared(Rc::from("bar")),
Symbol::shared(Rc::from("baz")),
Symbol::owned("quux".to_owned()),
Symbol::owned("quux"),
];
// Sort the list to demonstrate that our Ord implementation works.
symbols.as_mut_slice().sort();
// Equality testing doesn't depend on what kind of Symbol it is, just the text.
// We can compare the sorted version of the vec above to this one and it will
// be considered equal.
let expected = vec![
Symbol::owned("bar".to_owned()),
Symbol::owned("baz".to_owned()),
Symbol::owned("foo".to_owned()),
Symbol::owned("quux".to_owned()),
Symbol::owned("bar"),
Symbol::owned("baz"),
Symbol::owned("foo"),
Symbol::owned("quux"),
];
assert_eq!(symbols, expected)
}
Expand Down
25 changes: 13 additions & 12 deletions src/symbol_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::types::SymbolId;
// SymbolTable instances always have at least system symbols; they are never empty.
#[allow(clippy::len_without_is_empty)]
pub struct SymbolTable {
symbols_by_id: Vec<Option<Symbol>>,
symbols_by_id: Vec<Symbol>,
ids_by_text: HashMap<Symbol, SymbolId>,
}

Expand Down Expand Up @@ -56,7 +56,7 @@ impl SymbolTable {
let id = self.symbols_by_id.len();
let rc: Rc<str> = Rc::from(text);
let symbol = Symbol::shared(rc);
self.symbols_by_id.push(Some(symbol.clone()));
self.symbols_by_id.push(symbol.clone());
self.ids_by_text.insert(symbol, id);
id
}
Expand All @@ -65,7 +65,7 @@ impl SymbolTable {
/// encounters null or non-string values in a stream's symbol table.
pub fn add_placeholder(&mut self) -> SymbolId {
let sid = self.symbols_by_id.len();
self.symbols_by_id.push(None);
self.symbols_by_id.push(Symbol::unknown_text());
sid
}

Expand All @@ -86,14 +86,15 @@ impl SymbolTable {
/// If defined, returns the text associated with the provided Symbol ID.
pub fn text_for(&self, sid: SymbolId) -> Option<&str> {
self.symbols_by_id
// If the SID is out of bounds, returns None
.get(sid)?
.as_ref()
.map(|symbol| symbol.as_ref())
// If the text is unknown, returns None
.text()
}

/// If defined, returns the Symbol associated with the provided Symbol ID.
pub fn symbol_for(&self, sid: SymbolId) -> Option<&Symbol> {
self.symbols_by_id.get(sid)?.as_ref()
self.symbols_by_id.get(sid)
}

/// Returns true if the provided symbol ID maps to an entry in the symbol table (i.e. it is in
Expand All @@ -111,19 +112,19 @@ impl SymbolTable {

/// Returns a slice of references to the symbol text stored in the table.
///
/// The symbol table can contain symbols with unknown text; if the text for a given symbol is
/// unknown, the corresponding entry in the slice will be [None].
pub fn symbols(&self) -> &[Option<Symbol>] {
/// The symbol table can contain symbols with unknown text; see the documentation for
/// [Symbol] for more information.
pub fn symbols(&self) -> &[Symbol] {
&self.symbols_by_id
}

/// Returns a slice of references to the symbol text stored in the table starting at the given
/// symbol ID. If a symbol table append occurs during reading, this function can be used to
/// easily view the new symbols that has been added to the table.
///
/// The symbol table can contain symbols with unknown text; if the text for a given symbol is
/// unknown, the corresponding entry in the slice will be [None].
pub fn symbols_tail(&self, start: usize) -> &[Option<Symbol>] {
/// The symbol table can contain symbols with unknown text; see the documentation for
/// [Symbol] for more information.
pub fn symbols_tail(&self, start: usize) -> &[Symbol] {
&self.symbols_by_id[start..]
}

Expand Down
2 changes: 1 addition & 1 deletion src/text/raw_text_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ impl<W: Write> IonWriter for RawTextWriter<W> {
if popped_encoding_level.child_count > 0 {
// If this isn't an empty container, put the closing delimiter on the next line
// with proper indentation.
if self.space_between_values.contains(&['\n', '\r']) {
if self.space_between_values.contains(['\n', '\r']) {
writeln!(&mut self.output)?;
for _ in 0..self.depth() {
write!(&mut self.output, "{}", self.indentation)?;
Expand Down
Loading