From 6e9b0e9394f1b1c1e64dabd7a7f55910c95ee70f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tade=C3=A1=C5=A1=20Ku=C4=8Dera?= Date: Wed, 25 Mar 2020 18:15:54 +0100 Subject: [PATCH 1/2] Fix conjunction placement when it is preceded by newline and the newline is behind one line comment --- src/types/token_stream.cpp | 43 ++++++--- tests/cpp/parser_tests.cpp | 174 +++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+), 11 deletions(-) diff --git a/src/types/token_stream.cpp b/src/types/token_stream.cpp index 046d2a3d..fabda9d6 100644 --- a/src/types/token_stream.cpp +++ b/src/types/token_stream.cpp @@ -281,7 +281,7 @@ bool TokenStream::determineNewlineSectors() std::size_t lineCounter = 0; std::size_t doubleLineCounter = 0; bool wasLine = false; - for (auto it = begin(); it != end(); ++it) + for (auto it = begin(); it != end();) { auto current = it->getType(); if (current == LP || current == LP_ENUMERATION || current == HEX_JUMP_LEFT_BRACKET || current == REGEXP_START_SLASH || current == HEX_START_BRACKET || current == LP_WITH_SPACE_AFTER || current == LP_WITH_SPACES) @@ -305,6 +305,35 @@ bool TokenStream::determineNewlineSectors() } else wasLine = false; + + // Here we move some tokens preceding some and/or conjunction after these conjunctions. + // We only move comments and newlines. + // We only move around conjunctions that were on the beginning of a line: + if ((current == AND || current == OR) && std::prev(it)->getType() == NEW_LINE) + { + auto pre_it = std::prev(it); + ++it; + auto insert_before = it; + TokenType lastMoved = insert_before->getType(); + // We invalidate iterators but only those pointing to COMMENTs or NEW_LINEs, which are not referenced from outside of TokenStream. + while (pre_it->getType() == NEW_LINE || pre_it->getType() == COMMENT || pre_it->getType() == ONELINE_COMMENT) + { + if (lastMoved == NEW_LINE && pre_it->getType() == NEW_LINE) + { + erase(pre_it); + --pre_it; + } + else + { + insert_before = _tokens.emplace(insert_before, std::move(*pre_it)); + erase(pre_it); + lastMoved = insert_before->getType(); + --pre_it; + } + } + } + else + ++it; } // when more than half of the newlines is "doubled", then all "doubled" newlines are made simple newline. return 3 * doubleLineCounter > lineCounter; @@ -406,14 +435,6 @@ void TokenStream::addMissingNewLines() next = nextIt->getType(); } } - if (current == NEW_LINE) - { - if (next == OR || next == AND) - while (it->getType() == NEW_LINE) - it = std::prev(erase(it)); - else - ++lineCounter; - } } } @@ -522,6 +543,8 @@ std::size_t TokenStream::PrintHelper::printComment(std::stringstream* ss, TokenS std::string TokenStream::getText(bool withIncludes, bool alignComments) { + if (!_formatted) + autoformat(); PrintHelper helper; if (alignComments) getTextProcedure(helper, nullptr, withIncludes, alignComments); // First call determines alignment of comments @@ -545,8 +568,6 @@ std::string TokenStream::getText(bool withIncludes, bool alignComments) */ void TokenStream::getTextProcedure(PrintHelper& helper, std::stringstream* os, bool withIncludes, bool alignComments) { - if (!_formatted) - autoformat(); BracketStack brackets; size_t current_line_tabs = 0; bool inside_rule = false; diff --git a/tests/cpp/parser_tests.cpp b/tests/cpp/parser_tests.cpp index 9407a232..017d53bb 100644 --- a/tests/cpp/parser_tests.cpp +++ b/tests/cpp/parser_tests.cpp @@ -3186,6 +3186,180 @@ rule rule_2 EXPECT_EQ(expected, driver.getParsedFile().getTextFormatted()); } +TEST_F(ParserTests, +RemoveLineBeforeAndWithCommentsWorks) { + prepareInput( +R"(rule rule_1 { + strings: + $1 = "plain string" wide + $2 = { ab cd ef } + $3 = /ab*c/ + condition: + any of them + // cuckoo + or ( + true + // gvma + and false) +} + +rule rule_2 +{ + condition: + true + /* cuckoo */ + + or false +} +)"); + + EXPECT_TRUE(driver.parse(input)); + ASSERT_EQ(2u, driver.getParsedFile().getRules().size()); + + EXPECT_EQ( +R"(rule rule_1 { + strings: + $1 = "plain string" wide + $2 = { AB CD EF } + $3 = /ab*c/ + condition: + any of them or (true and false) +} + +rule rule_2 { + condition: + true or false +})", driver.getParsedFile().getText()); + + std::string expected = R"(rule rule_1 +{ + strings: + $1 = "plain string" wide + $2 = { ab cd ef } + $3 = /ab*c/ + condition: + any of them or + // cuckoo + ( + true and + // gvma + false + ) +} + +rule rule_2 +{ + condition: + true or + /* cuckoo */ + false +} +)"; + + EXPECT_EQ(expected, driver.getParsedFile().getTextFormatted()); +} + +TEST_F(ParserTests, +RemoveLineBeforeAndWithComments2Works) { + prepareInput( +R"( +import "cuckoo" + +rule rule_1 { + strings: + $1 = "plain string" wide + $2 = { ab cd ef } + $3 = /ab*c/ + condition: + any of them // cuckoo + or ( + true // gvma + + and false) +} + +rule rule_2 +{ + condition: + true /* cuckoo */ or false +} + +rule rule_3 +{ + condition: + //cuckoo + cuckoo.sync.mutex(/a/) + or cuckoo.sync.mutex(/b/) + //cuckoo 64-bit + or cuckoo.sync.mutex(/c/) + or cuckoo.sync.mutex(/d/) +} +)"); + + EXPECT_TRUE(driver.parse(input)); + ASSERT_EQ(3u, driver.getParsedFile().getRules().size()); + + EXPECT_EQ( +R"(import "cuckoo" + +rule rule_1 { + strings: + $1 = "plain string" wide + $2 = { AB CD EF } + $3 = /ab*c/ + condition: + any of them or (true and false) +} + +rule rule_2 { + condition: + true or false +} + +rule rule_3 { + condition: + cuckoo.sync.mutex(/a/) or cuckoo.sync.mutex(/b/) or cuckoo.sync.mutex(/c/) or cuckoo.sync.mutex(/d/) +})", driver.getParsedFile().getText()); + + std::string expected = R"( +import "cuckoo" + +rule rule_1 +{ + strings: + $1 = "plain string" wide + $2 = { ab cd ef } + $3 = /ab*c/ + condition: + any of them or // cuckoo + ( + true and // gvma + false + ) +} + +rule rule_2 +{ + condition: + true /* cuckoo */ or + false +} + +rule rule_3 +{ + condition: + //cuckoo + cuckoo.sync.mutex(/a/) or + cuckoo.sync.mutex(/b/) or + //cuckoo 64-bit + cuckoo.sync.mutex(/c/) or + cuckoo.sync.mutex(/d/) +} +)"; + + EXPECT_EQ(expected, driver.getParsedFile().getTextFormatted()); +} + TEST_F(ParserTests, MultipleRulesWorks2) { prepareInput( From b09a1fbec9794d8dc7c6bce98704aff094a1cf4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tade=C3=A1=C5=A1=20Ku=C4=8Dera?= Date: Wed, 25 Mar 2020 18:53:38 +0100 Subject: [PATCH 2/2] Avoid using iterator after erasing its content from container. Use erase return value instead. --- src/types/token_stream.cpp | 6 ++---- tests/cpp/parser_tests.cpp | 14 +++++++++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/types/token_stream.cpp b/src/types/token_stream.cpp index fabda9d6..34e8fccc 100644 --- a/src/types/token_stream.cpp +++ b/src/types/token_stream.cpp @@ -320,15 +320,13 @@ bool TokenStream::determineNewlineSectors() { if (lastMoved == NEW_LINE && pre_it->getType() == NEW_LINE) { - erase(pre_it); - --pre_it; + pre_it = std::prev(erase(pre_it)); } else { insert_before = _tokens.emplace(insert_before, std::move(*pre_it)); - erase(pre_it); + pre_it = std::prev(erase(pre_it)); lastMoved = insert_before->getType(); - --pre_it; } } } diff --git a/tests/cpp/parser_tests.cpp b/tests/cpp/parser_tests.cpp index 017d53bb..dc604116 100644 --- a/tests/cpp/parser_tests.cpp +++ b/tests/cpp/parser_tests.cpp @@ -3289,9 +3289,17 @@ rule rule_3 condition: //cuckoo cuckoo.sync.mutex(/a/) + or cuckoo.sync.mutex(/b/) + //cuckoo 64-bit - or cuckoo.sync.mutex(/c/) + + + and cuckoo.sync.mutex(/c/) + + + + or cuckoo.sync.mutex(/d/) } )"); @@ -3318,7 +3326,7 @@ rule rule_2 { rule rule_3 { condition: - cuckoo.sync.mutex(/a/) or cuckoo.sync.mutex(/b/) or cuckoo.sync.mutex(/c/) or cuckoo.sync.mutex(/d/) + cuckoo.sync.mutex(/a/) or cuckoo.sync.mutex(/b/) and cuckoo.sync.mutex(/c/) or cuckoo.sync.mutex(/d/) })", driver.getParsedFile().getText()); std::string expected = R"( @@ -3350,7 +3358,7 @@ rule rule_3 condition: //cuckoo cuckoo.sync.mutex(/a/) or - cuckoo.sync.mutex(/b/) or + cuckoo.sync.mutex(/b/) and //cuckoo 64-bit cuckoo.sync.mutex(/c/) or cuckoo.sync.mutex(/d/)