Skip to content

Commit

Permalink
re #52: fixes fail on json preprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed May 12, 2020
1 parent 829f724 commit 5666f52
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 43 deletions.
1 change: 1 addition & 0 deletions src/c4/yml/detail/parser_dbg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#ifndef _C4_YML_COMMON_HPP_
#include "../common.hpp"
#endif
#include <cstdio>

//-----------------------------------------------------------------------------
// some debugging scaffolds
Expand Down
83 changes: 51 additions & 32 deletions src/c4/yml/preprocess.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "c4/yml/preprocess.hpp"
#include "c4/yml/detail/parser_dbg.hpp"
#include <cstdio>

/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */

Expand Down Expand Up @@ -31,6 +30,7 @@ struct _SubstrWriter
}
++pos;
}
size_t slack() const { return pos <= buf.len ? buf.len - pos : 0; }
size_t excess() const { return pos > buf.len ? pos - buf.len : 0; }
//! get the part written so far
csubstr curr() const { return pos <= buf.len ? buf.first(pos) : buf; }
Expand All @@ -39,21 +39,6 @@ struct _SubstrWriter

size_t advance(size_t more) { pos += more; return pos; }
};

bool _is_idchar(char c)
{
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| (c == '_' || c == '-' || c == '~' || c == '$');
}

typedef enum { kReadPending = 0, kKeyPending = 1, kValPending = 2 } _ppstate;
_ppstate _next(_ppstate s)
{
int n = (int)s + 1;
return (_ppstate)(n <= (int)kValPending ? n : 0);
}
} // empty namespace


Expand All @@ -67,48 +52,63 @@ size_t preprocess_json(csubstr s, substr buf)
size_t last = 0; // the index of the last character in s that was copied to buf

// append everything that was not written yet
#define _apfromlast() { csubstr _s_ = (s.range(last, i)); _append(_s_); last += _s_.len; i += _s_.len; }
#define _adv(n) { size_t _n_ = (n); _append.advance(_n_); last += _n_; i += _n_; }
#define _apelm(c) { _append(c); ++last; ++i; } // append element from the buffer
#define _apfromlast() { csubstr _s_ = s.range(last, i); _append(_s_); last += _s_.len; }
// append element from the buffer
#define _apelm(c) { _append(c); ++last; }
#define _adv(nsrc, ndst) { _append.advance(ndst); i += nsrc; last += nsrc; }

for(size_t i = 0; i < s.len; ++i)
{
const char curr = s[i];
const char next = i+1 < s.len ? s[i+1] : '\0';
if(curr == ':') // if it was missing, add a space after semicolon
if(curr == ':') // if a space is missing after a semicolon, add it
{
if(next == '"' || next == '\'' || next == '{' || next == '['
|| (next >= '0' && next <= '9'))
bool insert = false;
if(next == '"' || next == '\'' || next == '{' || next == '[' || (next >= '0' && next <= '9'))
{
insert = true;
}
else if(i+1 < s.len)
{
csubstr rem = s.sub(i+1);
if(rem.begins_with("true") || rem.begins_with("false"))
{
insert = true;
}
}
if(insert)
{
_apfromlast();
_apelm(':');
_apelm(curr);
_append(' ');
}
}
else if((curr == '{' || curr == '[') && next != '\0') // recurse into substructures
{
const char close = static_cast<char>(curr + 2); // in ascii: {=123,}=125 and [=91,]=93. So add 2!
// get the close-character maching the open-character.
// In ascii: {=123,}=125 and [=91,]=93. So just add 2!
const char close = static_cast<char>(curr + 2);
// get the contents inside the brackets
csubstr ss = s.sub(i).pair_range_nested(curr, close);
C4_ASSERT(ss.size() >= 2);
C4_ASSERT(ss.ends_with(close));
RYML_ASSERT(ss.size() >= 2);
RYML_ASSERT(ss.ends_with(close));
ss = ss.offs(1, 1); // skip the open-close bracket characters
_apfromlast();
_apelm(curr);
if(!ss.empty()) // recurse into the substring
{
size_t ret = preprocess_json(ss, _append.rem());
_adv(ret);
_adv(ss.len, ret);
}
_apelm(close);
}
else if(curr == '\'' || curr == '"') // consume quoted strings at once
{
csubstr ss = s.sub(i).pair_range_esc(curr, '\\');
C4_ASSERT(ss.end() >= (s.str + i));
i += ss.end() - (s.str + i);
C4_ASSERT(i > 0);
i -= 1; //
RYML_ASSERT(ss.begins_with(curr) && ss.ends_with(curr));
i += ss.len;
_apfromlast();
--i;
}
}

Expand All @@ -118,7 +118,6 @@ size_t preprocess_json(csubstr s, substr buf)
}

#undef _apfromlast
#undef _apchar
#undef _apelm
#undef _adv

Expand All @@ -130,6 +129,26 @@ size_t preprocess_json(csubstr s, substr buf)
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

namespace {
bool _is_idchar(char c)
{
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| (c == '_' || c == '-' || c == '~' || c == '$');
}

typedef enum { kReadPending = 0, kKeyPending = 1, kValPending = 2 } _ppstate;
_ppstate _next(_ppstate s)
{
int n = (int)s + 1;
return (_ppstate)(n <= (int)kValPending ? n : 0);
}
} // empty namespace


//-----------------------------------------------------------------------------

size_t preprocess_rxmap(csubstr s, substr buf)
{
_SubstrWriter _append(buf);
Expand Down
71 changes: 60 additions & 11 deletions test/preprocess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,73 @@
namespace c4 {
namespace yml {

#define _test(val, expected) \
EXPECT_EQ(preprocess_json<std::string>(val), expected)

TEST(preprocess, json_basic)
{
#define _test(val, expected) \
EXPECT_EQ(preprocess_json<std::string>(val), expected)

TEST(preprocess_json, basic)
{
_test("", "");
_test("{}", "{}");
_test("\"a\":\"b\"", "\"a\": \"b\"");
_test("'a':'b'", "'a': 'b'");
_test("{'a':'b'}", "{'a': 'b'}");
_test("{\"a\":\"b\"}", "{\"a\": \"b\"}");
_test(R"("a":"b")",
R"("a": "b")");
_test(R"('a':'b')",
R"('a': 'b')");
_test(R"({'a':'b'})",
R"({'a': 'b'})");
_test(R"({"a":"b"})",
R"({"a": "b"})");

_test("{\"a\":{\"a\":\"b\"}}", "{\"a\": {\"a\": \"b\"}}");
_test("{'a':{'a':'b'}}", "{'a': {'a': 'b'}}");
#undef _test
_test(R"({"a":{"a":"b"}})",
R"({"a": {"a": "b"}})");
_test(R"({'a':{'a':'b'}})",
R"({'a': {'a': 'b'}})");
}

TEST(preprocess_json, github52)
{
_test(R"({"a": "b","c": 42,"d": "e"})",
R"({"a": "b","c": 42,"d": "e"})");
_test(R"({"aaaa": "bbbb","cccc": 424242,"dddddd": "eeeeeee"})",
R"({"aaaa": "bbbb","cccc": 424242,"dddddd": "eeeeeee"})");

_test(R"({"a":"b","c":42,"d":"e"})",
R"({"a": "b","c": 42,"d": "e"})");
_test(R"({"aaaaa":"bbbbb","ccccc":424242,"ddddd":"eeeee"})",
R"({"aaaaa": "bbbbb","ccccc": 424242,"ddddd": "eeeee"})");
_test(R"({"a":"b","c":{},"d":"e"})",
R"({"a": "b","c": {},"d": "e"})");
_test(R"({"aaaaa":"bbbbb","ccccc":{ },"ddddd":"eeeee"})",
R"({"aaaaa": "bbbbb","ccccc": { },"ddddd": "eeeee"})");
_test(R"({"a":"b","c":false,"d":"e"})",
R"({"a": "b","c": false,"d": "e"})");
_test(R"({"aaaaa":"bbbbb","ccccc":false,"ddddd":"eeeee"})",
R"({"aaaaa": "bbbbb","ccccc": false,"ddddd": "eeeee"})");
_test(R"({"a":"b","c":false,"d":"e"})",
R"({"a": "b","c": false,"d": "e"})");
_test(R"({"aaaaa":"bbbbb","ccccc":true,"ddddd":"eeeee"})",
R"({"aaaaa": "bbbbb","ccccc": true,"ddddd": "eeeee"})");
}

TEST(preprocess_json, nested)
{
_test(R"({"a":"b","c":{"a":"b","c":{},"d":"e"},"d":"e"})",
R"({"a": "b","c": {"a": "b","c": {},"d": "e"},"d": "e"})");
_test(R"({"a":"b","c":{"a":"b","c":{"a":"b","c":{},"d":"e"},"d":"e"},"d":"e"})",
R"({"a": "b","c": {"a": "b","c": {"a": "b","c": {},"d": "e"},"d": "e"},"d": "e"})");
_test(R"({"a":"b","c":{"a":"b","c":{"a":"b","c":{"a":"b","c":{},"d":"e"},"d":"e"},"d":"e"},"d":"e"})",
R"({"a": "b","c": {"a": "b","c": {"a": "b","c": {"a": "b","c": {},"d": "e"},"d": "e"},"d": "e"},"d": "e"})");

_test(R"({"a":"b","c":["a","c","d","e"],"d":"e"})",
R"({"a": "b","c": ["a","c","d","e"],"d": "e"})");
}

#undef _test


//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

TEST(preprocess, rxmap_basic)
{
Expand Down

0 comments on commit 5666f52

Please sign in to comment.