Skip to content

Commit

Permalink
#6031: More block tokeniser unit tests. Fix parsing when encountering…
Browse files Browse the repository at this point in the history
… control characters within a quoted string within a block
  • Loading branch information
codereader committed Aug 12, 2022
1 parent b0595bd commit e530be6
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 25 deletions.
67 changes: 49 additions & 18 deletions libs/parser/DefBlockSyntaxParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ struct DefSyntaxToken
{
Nothing,
Whitespace,
OpeningBrace,
ClosingBrace,
BracedBlock, // starting with { and *maybe* ending with }
Token,
EolComment,
BlockComment,
Expand Down Expand Up @@ -129,11 +128,13 @@ class DefBlockSyntaxTokeniserFunc
// Enumeration of states
enum class State
{
Searching, // haven't found anything yet
Whitespace, // on whitespace
Token, // non-whitespace, non-control character
BlockComment, // within a /* block comment */
EolComment, // on an EOL comment starting with //
Searching, // haven't found anything yet
Whitespace, // on whitespace
Token, // non-whitespace, non-control character
BracedBlock, // within a braced block
QuotedStringWithinBlock, // within a quoted string within a block
BlockComment, // within a /* block comment */
EolComment, // on an EOL comment starting with //
} _state;

constexpr static const char* const Delims = " \t\n\v\r";
Expand Down Expand Up @@ -171,7 +172,7 @@ class DefBlockSyntaxTokeniserFunc
// Clear out the token, no guarantee that it is empty
tok.clear();

std::size_t blockLevel = 0;
std::size_t openedBlocks = 0;

while (next != end)
{
Expand All @@ -191,18 +192,12 @@ class DefBlockSyntaxTokeniserFunc

if (ch == OpeningBrace)
{
tok.type = DefSyntaxToken::Type::OpeningBrace;
_state = State::BracedBlock;
tok.type = DefSyntaxToken::Type::BracedBlock;
tok.value += ch;
openedBlocks = 1;
++next;
return true;
}

if (ch == ClosingBrace)
{
tok.type = DefSyntaxToken::Type::ClosingBrace;
tok.value += ch;
++next;
return true;
continue;
}

if (ch == '/')
Expand Down Expand Up @@ -240,6 +235,42 @@ class DefBlockSyntaxTokeniserFunc
++next;
continue;

case State::BracedBlock:

// Add the character and advance in any case
tok.value += ch;
++next;

// Check for another opening brace
if (ch == OpeningBrace)
{
// another block within this block, ignore this
++openedBlocks;
}
else if (ch == ClosingBrace && --openedBlocks == 0)
{
// End of block content, we're done here
return true;
}
else if (ch == '"')
{
// An opening quote within the braced block, switch to a special block
// ignoring any control characters within that string
_state = State::QuotedStringWithinBlock;
}
continue;

case State::QuotedStringWithinBlock:
// Add the character and advance over anything
tok.value += ch;
++next;

if (ch == '"')
{
_state = State::BracedBlock;
}
continue;

case State::Whitespace:
if (IsWhitespace(ch))
{
Expand Down
50 changes: 43 additions & 7 deletions test/DefBlockSyntaxParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,28 +59,64 @@ TEST(DefBlockSyntaxTokeniser, SingleTokens)
expectSingleToken("test", parser::DefSyntaxToken::Type::Token, "test");
expectSingleToken("textures/common", parser::DefSyntaxToken::Type::Token, "textures/common");
expectSingleToken("m/is/leading*/token/", parser::DefSyntaxToken::Type::Token, "m/is/leading*/token/");

expectSingleToken("// EOL comment ", parser::DefSyntaxToken::Type::EolComment, "// EOL comment ");
expectSingleToken("//EOLcomment", parser::DefSyntaxToken::Type::EolComment, "//EOLcomment");
expectSingleToken("//", parser::DefSyntaxToken::Type::EolComment, "//");
expectSingleToken("//EOLcomment", parser::DefSyntaxToken::Type::EolComment, "//EOLcomment");
expectSingleToken("/* block comment */", parser::DefSyntaxToken::Type::BlockComment, "/* block comment */");
expectSingleToken("/* bl/ock * * * comment */", parser::DefSyntaxToken::Type::BlockComment, "/* bl/ock * * * comment */");
expectSingleToken("/* blk \n test test\n\ncomment */", parser::DefSyntaxToken::Type::BlockComment, "/* blk \n test test\n\ncomment */");
expectSingleToken("/* this should not crash *", parser::DefSyntaxToken::Type::BlockComment, "/* this should not crash *");
}

TEST(DefBlockSyntaxTokeniser, TokenSequences)
void expectTokenSequence(const std::string& source, const std::vector<std::pair<parser::DefSyntaxToken::Type, std::string>>& sequence)
{
std::string source = " test{}";
string::Tokeniser<parser::DefBlockSyntaxTokeniserFunc, std::string::const_iterator, parser::DefSyntaxToken> tokeniser(
source, parser::DefBlockSyntaxTokeniserFunc()
);

auto it = tokeniser.getIterator();

expectToken(*it++, parser::DefSyntaxToken::Type::Whitespace, " ");
expectToken(*it++, parser::DefSyntaxToken::Type::Token, "test");
expectToken(*it++, parser::DefSyntaxToken::Type::OpeningBrace, "{");
expectToken(*it++, parser::DefSyntaxToken::Type::ClosingBrace, "}");
for (const auto& [type, value] : sequence)
{
expectToken(*it++, type, value);
}
}

TEST(DefBlockSyntaxTokeniser, TokenSequences)
{
expectTokenSequence(" test{}",
{
{ parser::DefSyntaxToken::Type::Whitespace, " " },
{ parser::DefSyntaxToken::Type::Token, "test" },
{ parser::DefSyntaxToken::Type::BracedBlock, "{}" },
});

expectTokenSequence(" test//comment\n{\n{\r\n TESt \n}\n}",
{
{ parser::DefSyntaxToken::Type::Whitespace, " " },
{ parser::DefSyntaxToken::Type::Token, "test" },
{ parser::DefSyntaxToken::Type::EolComment, "//comment" },
{ parser::DefSyntaxToken::Type::Whitespace, "\n" },
{ parser::DefSyntaxToken::Type::BracedBlock, "{\n{\r\n TESt \n}\n}" },
});

expectTokenSequence("/*comment*/\ntest/* comment */{{//",
{
{ parser::DefSyntaxToken::Type::BlockComment, "/*comment*/" },
{ parser::DefSyntaxToken::Type::Whitespace, "\n" },
{ parser::DefSyntaxToken::Type::Token, "test" },
{ parser::DefSyntaxToken::Type::BlockComment, "/* comment */" },
{ parser::DefSyntaxToken::Type::BracedBlock, "{{//" },
});

expectTokenSequence("test\n{\n \"some to//kens {{\" \"containing /* control characters */\" // test\n}\r\n\r\n",
{
{ parser::DefSyntaxToken::Type::Token, "test" },
{ parser::DefSyntaxToken::Type::Whitespace, "\n" },
{ parser::DefSyntaxToken::Type::BracedBlock, "{\n \"some to//kens {{\" \"containing /* control characters */\" // test\n}" },
{ parser::DefSyntaxToken::Type::Whitespace, "\r\n\r\n" },
});
}

TEST(DefBlockSyntaxParser, EmptyText)
Expand Down

0 comments on commit e530be6

Please sign in to comment.