Skip to content

Commit

Permalink
[break] use null strings when parsing YAML-null values
Browse files Browse the repository at this point in the history
Merge pull request #85 from biojppm/issue84
Breaking change: null values in YAML are now parsed to null strings instead of YAML null token "~":

auto tree = parse("{foo: , bar: ''}");

// previous:
assert(tree["foo"].val() == "~");
assert(tree["bar"].val() == "");
    
// now:
assert(tree["foo"].val() == nullptr); // notice that this is now null
assert(tree["bar"].val() == "");

Fixes #84.
  • Loading branch information
biojppm committed Sep 26, 2020
2 parents 6037350 + 2c37274 commit 5390bb9
Show file tree
Hide file tree
Showing 14 changed files with 254 additions and 143 deletions.
33 changes: 31 additions & 2 deletions CHANGELOG.md
@@ -1,12 +1,41 @@
# Changelog

## Changes in master
Currently there are no ryml releases. However, the master branch is always
stable; it is validated by requiring all tests to succeed before merging to it.
So for now, only major changes to master are listed.

* 2020/September
* [***Breaking change***] [MR#85](https://github.com/biojppm/rapidyaml/pull/85)
null values in YAML are now parsed to null
strings instead of YAML null token "~":
```c++
auto tree = parse("{foo: , bar: ''}");
// previous:
assert(tree["foo"].val() == "~");
assert(tree["bar"].val() == "");
// now:
assert(tree["foo"].val() == nullptr); // notice that this is now null
assert(tree["bar"].val() == "");
```
* [MR#85](https://github.com/biojppm/rapidyaml/pull/85) Commas after tags are now allowed:
```yaml
{foo: !!str, bar: ''} # now the comma does not cause an error
```
* [MR#81](https://github.com/biojppm/rapidyaml/pull/81): Always compile
with extra pedantic warnings.

* 2020/May
* ***Breaking change***: the error callback now receives a source location object:
* [***Breaking change***] the error callback now receives a source location object:
```c++
// previous
using pfn_error = void (*)(const char* msg, size_t msg_len, void *user_data);
// now:
using pfn_error = void (*)(const char* msg, size_t msg_len, Location location, void *user_data);
```
* Parser fixes to improve test suite success:
[MR#73](https://github.com/biojppm/rapidyaml/pull/73),
[MR#71](https://github.com/biojppm/rapidyaml/pull/71),
[MR#68](https://github.com/biojppm/rapidyaml/pull/68),
[MR#67](https://github.com/biojppm/rapidyaml/pull/67),
[MR#66](https://github.com/biojppm/rapidyaml/pull/66)
* Fix compilation as DLL on windows [MR#69](https://github.com/biojppm/rapidyaml/pull/69)
29 changes: 19 additions & 10 deletions src/c4/yml/emit.def.hpp
Expand Up @@ -322,26 +322,35 @@ void Emitter<Writer>::_write_scalar_block(csubstr s, size_t ilevel, bool as_key)
template<class Writer>
void Emitter<Writer>::_write_scalar(csubstr s)
{
// this block of code needed to be moved to before the needs_quotes
// assignment to workaround a g++ optimizer bug where (s.str != nullptr)
// was evaluated as true even if s.str was actually a nullptr (!!!)
if(s.len == 0)
{
if(s.str != nullptr)
{
this->Writer::_do_write("''");
}
else
{
this->Writer::_do_write('~');
}
return;
}

const bool needs_quotes = (
!s.is_number() // is not a number
&&
(
(s != s.trim(" \t\n\r")) // has leading or trailing whitespace
||
s.first_of("#:-?,\n{}[]'\"") != npos // has special chars
)
);
)
);

if(!needs_quotes)
{
if( ! s.empty())
{
this->Writer::_do_write(s);
}
else
{
this->Writer::_do_write("''");
}
this->Writer::_do_write(s);
}
else
{
Expand Down
65 changes: 45 additions & 20 deletions src/c4/yml/parse.cpp
Expand Up @@ -647,6 +647,20 @@ bool Parser::_handle_seq_expl()
{
return true;
}
else if(rem.begins_with(", "))
{
_c4dbgp("found ',' -- the value was null");
_append_val_null();
_line_progressed(2);
return true;
}
else if(rem.begins_with(','))
{
_c4dbgp("found ',' -- the value was null");
_append_val_null();
_line_progressed(1);
return true;
}
else
{
_c4err("parse error");
Expand Down Expand Up @@ -897,7 +911,7 @@ bool Parser::_rval_dash_start_or_continue_seq()
{
_c4dbgp("prev val was empty");
addrem_flags(RNXT, RVAL);
_append_val("~");
_append_val_null();
return false;
}
_c4dbgp("val is a nested seq, indented");
Expand Down Expand Up @@ -938,7 +952,7 @@ bool Parser::_handle_map_expl()
if(has_all(SSCL))
{
_c4dbgp("the last val was null");
_append_key_val("~");
_append_key_val_null();
rem_flags(RVAL);
}
_pop_level();
Expand Down Expand Up @@ -1032,7 +1046,7 @@ bool Parser::_handle_map_expl()
else if(rem.begins_with(','))
{
_c4dbgp("prev scalar was a key with null value");
_append_key_val("~");
_append_key_val_null();
_line_progressed(1);
return true;
}
Expand All @@ -1041,7 +1055,7 @@ bool Parser::_handle_map_expl()
_c4dbgp("map terminates after a key...");
RYML_ASSERT(has_all(SSCL));
_c4dbgp("the last val was null");
_append_key_val("~");
_append_key_val_null();
rem_flags(RVAL);
if(has_all(RSEQIMAP))
{
Expand All @@ -1067,7 +1081,8 @@ bool Parser::_handle_map_expl()
}
else if(rem.begins_with('}'))
{
_append_key_val("~");
_c4dbgp("the last val was null");
_append_key_val_null();
_line_progressed(1);
return true;
}
Expand Down Expand Up @@ -1152,7 +1167,7 @@ bool Parser::_handle_map_expl()
else if(rem.begins_with(','))
{
_c4dbgp("appending empty val");
_append_key_val("~");
_append_key_val_null();
addrem_flags(RKEY, RVAL);
_line_progressed(1);
if(has_any(RSEQIMAP))
Expand All @@ -1168,7 +1183,7 @@ bool Parser::_handle_map_expl()
_c4dbgp("stopping implicitly nested 1x map");
if(has_any(SSCL))
{
_append_key_val("~");
_append_key_val_null();
}
_stop_seqimap();
_pop_level();
Expand Down Expand Up @@ -1230,7 +1245,7 @@ bool Parser::_handle_map_impl()
if(has_all(CPLX|RSET))
{
_c4dbgp("it's a complex key, so use null value '~'");
_append_key_val("~");
_append_key_val_null();
}
rem = m_state->line_contents.rem;

Expand Down Expand Up @@ -1265,7 +1280,7 @@ bool Parser::_handle_map_impl()
_line_progressed(2);
if(has_all(SSCL))
{
_append_key_val("~");
_append_key_val_null();
}
return true;
}
Expand Down Expand Up @@ -1600,7 +1615,7 @@ bool Parser::_handle_types()
if(rem.begins_with("!!"))
{
_c4dbgp("begins with '!!'");
t = rem.left_of(rem.first_of(' '));
t = rem.left_of(rem.first_of(" ,"));
RYML_ASSERT(t.len >= 2);
//t = t.sub(2);
if(t == "!!set")
Expand Down Expand Up @@ -1656,8 +1671,8 @@ bool Parser::_handle_types()
if(rem == ':' || rem.begins_with(": "))
{
_c4dbgp("the last val was null, and this is a tag from a null key");
_append_key_val("~");
_store_scalar("~");
_append_key_val_null();
_store_scalar_null();
// do not change the flag to key, it is ~
RYML_ASSERT(rem.begin() > m_state->line_contents.rem.begin());
size_t token_len = rem == ':' ? 1 : 2;
Expand Down Expand Up @@ -2588,8 +2603,8 @@ void Parser::_end_stream()
}
else if(m_tree->is_map(m_state->node_id))
{
_c4dbgp("append key val...");
added = _append_key_val("~");
_c4dbgp("append null key val...");
added = _append_key_val_null();
if(has_any(RSEQIMAP))
{
_stop_seqimap();
Expand All @@ -2615,7 +2630,7 @@ void Parser::_end_stream()
}
else if(has_all(RSEQ|RVAL) && has_none(EXPL))
{
added = _append_val("~");
added = _append_val_null();
}

if(added)
Expand Down Expand Up @@ -2851,12 +2866,17 @@ void Parser::_stop_seqimap()
}

//-----------------------------------------------------------------------------
NodeData* Parser::_append_val(csubstr const& val)
NodeData* Parser::_append_val(csubstr val)
{
RYML_ASSERT( ! has_all(SSCL));
RYML_ASSERT(node(m_state) != nullptr);
RYML_ASSERT(node(m_state)->is_seq());
_c4dbgpf("append val: '%.*s' to parent id=%zd (level=%zd)", _c4prsp(val), m_state->node_id, m_state->level);
if(val == '~')
{
_c4dbgp("val was '~', so use null");
val = {};
}
size_t nid = m_tree->append_child(m_state->node_id);
m_tree->to_val(nid, val);
_c4dbgpf("append val: id=%zd key='%.*s' val='%.*s'", nid, _c4prsp(m_tree->get(nid)->m_key.scalar), _c4prsp(m_tree->get(nid)->m_val.scalar));
Expand All @@ -2870,11 +2890,16 @@ NodeData* Parser::_append_val(csubstr const& val)
return m_tree->get(nid);
}

NodeData* Parser::_append_key_val(csubstr const& val)
NodeData* Parser::_append_key_val(csubstr val)
{
RYML_ASSERT(node(m_state)->is_map());
csubstr key = _consume_scalar();
_c4dbgpf("append keyval: '%.*s' '%.*s' to parent id=%zd (level=%zd)", _c4prsp(key), _c4prsp(val), m_state->node_id, m_state->level);
if(val == '~')
{
_c4dbgp("val was '~', so use null");
val = {};
}
size_t nid = m_tree->append_child(m_state->node_id);
m_tree->to_keyval(nid, key, val);
_c4dbgpf("append keyval: id=%zd key='%.*s' val='%.*s'", nid, _c4prsp(m_tree->get(nid)->key()), _c4prsp(m_tree->get(nid)->val()));
Expand Down Expand Up @@ -2953,7 +2978,7 @@ bool Parser::_handle_indentation()
{
if(has_all(RMAP))
{
_append_key_val("~");
_append_key_val_null();
addrem_flags(RKEY, RVAL);
}
else if(has_all(RSEQ))
Expand Down Expand Up @@ -2991,12 +3016,12 @@ bool Parser::_handle_indentation()
if(has_all(RMAP))
{
RYML_ASSERT(has_all(SSCL));
_append_key_val("~");
_append_key_val_null();
}
else if(has_all(RSEQ))
{
RYML_ASSERT(has_none(SSCL));
_append_val("~");
_append_val_null();
}
}
// search the stack frame to jump to based on its indentation
Expand Down
7 changes: 5 additions & 2 deletions src/c4/yml/parse.hpp
Expand Up @@ -154,11 +154,14 @@ class RYML_EXPORT Parser
void _start_new_doc(csubstr rem);
void _end_stream();

NodeData* _append_val(csubstr const& val);
NodeData* _append_key_val(csubstr const& val);
NodeData* _append_val(csubstr val);
NodeData* _append_key_val(csubstr val);
inline NodeData* _append_val_null() { return _append_val({}/*"~"*/); }
inline NodeData* _append_key_val_null() { return _append_key_val({}/*"~"*/); }
bool _rval_dash_start_or_continue_seq();

void _store_scalar(csubstr const& s);
void _store_scalar_null() { _store_scalar({}/*"~"*/); }
csubstr _consume_scalar();
void _move_scalar_from_top();

Expand Down
4 changes: 2 additions & 2 deletions src/c4/yml/tree.cpp
Expand Up @@ -1628,11 +1628,11 @@ size_t Tree::_next_node(lookup_result * r, bool modify, _lookup_path_token *pare
{
if(is_map(r->closest))
{
to_keyval(node, "~", "~");
to_keyval(node, /*"~"*/{}, /*"~"*/{});
}
else if(is_seq(r->closest))
{
to_val(node, "~");
to_val(node, /*"~"*/{});
}
}
}
Expand Down
52 changes: 52 additions & 0 deletions test/test_case.cpp
Expand Up @@ -238,6 +238,58 @@ TEST(CaseNode, setting_up)

//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
NodeType_e CaseNode::_guess() const
{
NodeType t;
C4_ASSERT(!val.empty() != !children.empty() || (val.empty() && children.empty()));
if(children.empty())
{
C4_ASSERT(parent);
if(key.empty())
{
t = VAL;
}
else
{
t = KEYVAL;
}
}
else
{
NodeType_e has_key = key.empty() ? NOTYPE : KEY;
auto const& ch = children.front();
if(ch.key.empty())
{
t = (has_key|SEQ);
}
else
{
t = (has_key|MAP);
}
}
if( ! key_tag.empty())
{
C4_ASSERT( ! key.empty());
t.add(KEYTAG);
}
if( ! val_tag.empty())
{
C4_ASSERT( ! val.empty() || ! children.empty());
t.add(VALTAG);
}
if( ! key_anchor.str.empty())
{
t.add(key_anchor.type);
}
if( ! val_anchor.str.empty())
{
t.add(val_anchor.type);
}
return t;
}


//-----------------------------------------------------------------------------
void CaseNode::compare_child(yml::NodeRef const& n, size_t pos) const
{
Expand Down

0 comments on commit 5390bb9

Please sign in to comment.