Fix conjunction placement (#89)

* Fix conjunction placement when it is preceded by newline and the newline is behind one line comment * Avoid using iterator after erasing its content from container. Use erase return value instead. Co-authored-by: Tadeáš Kučera <tadeas.kucera@avast.com>
avast · Mar 25, 2020 · 60b2123 · 60b2123
1 parent 88237bf
commit 60b2123
Show file tree

Hide file tree

Showing 2 changed files with 212 additions and 11 deletions.
diff --git a/src/types/token_stream.cpp b/src/types/token_stream.cpp
@@ -281,7 +281,7 @@ bool TokenStream::determineNewlineSectors()
 	std::size_t lineCounter = 0;
 	std::size_t doubleLineCounter = 0;
 	bool wasLine = false;
-	for (auto it = begin(); it != end(); ++it)
+	for (auto it = begin(); it != end();)
 	{
 		auto current = it->getType();
 		if (current == LP || current == LP_ENUMERATION || current == HEX_JUMP_LEFT_BRACKET || current == REGEXP_START_SLASH || current == HEX_START_BRACKET || current == LP_WITH_SPACE_AFTER || current == LP_WITH_SPACES)
@@ -305,6 +305,33 @@ bool TokenStream::determineNewlineSectors()
 		}
 		else
 			wasLine = false;
+
+		// Here we move some tokens preceding some and/or conjunction after these conjunctions.
+		// We only move comments and newlines.
+		// We only move around conjunctions that were on the beginning of a line:
+		if ((current == AND || current == OR) && std::prev(it)->getType() == NEW_LINE)
+		{
+			auto pre_it = std::prev(it);
+			++it;
+			auto insert_before = it;
+			TokenType lastMoved = insert_before->getType();
+			// We invalidate iterators but only those pointing to COMMENTs or NEW_LINEs, which are not referenced from outside of TokenStream.
+			while (pre_it->getType() == NEW_LINE || pre_it->getType() == COMMENT || pre_it->getType() == ONELINE_COMMENT)
+			{
+				if (lastMoved == NEW_LINE && pre_it->getType() == NEW_LINE)
+				{
+					pre_it = std::prev(erase(pre_it));
+				}
+				else
+				{
+					insert_before = _tokens.emplace(insert_before, std::move(*pre_it));
+					pre_it = std::prev(erase(pre_it));
+					lastMoved = insert_before->getType();
+				}
+			}
+		}
+		else
+			++it;
 	}
 	// when more than half of the newlines is "doubled", then all "doubled" newlines are made simple newline.
 	return 3 * doubleLineCounter > lineCounter;
@@ -406,14 +433,6 @@ void TokenStream::addMissingNewLines()
 				next = nextIt->getType();
 			}
 		}
-		if (current == NEW_LINE)
-		{
-			if (next == OR || next == AND)
-				while (it->getType() == NEW_LINE)
-					it = std::prev(erase(it));
-			else
-				++lineCounter;
-		}
 	}
 }
 
@@ -522,6 +541,8 @@ std::size_t TokenStream::PrintHelper::printComment(std::stringstream* ss, TokenS
 
 std::string TokenStream::getText(bool withIncludes, bool alignComments)
 {
+	if (!_formatted)
+		autoformat();
 	PrintHelper helper;
 	if (alignComments)
 		getTextProcedure(helper, nullptr, withIncludes, alignComments); // First call determines alignment of comments
@@ -545,8 +566,6 @@ std::string TokenStream::getText(bool withIncludes, bool alignComments)
  */
 void TokenStream::getTextProcedure(PrintHelper& helper, std::stringstream* os, bool withIncludes, bool alignComments)
 {
-	if (!_formatted)
-		autoformat();
 	BracketStack brackets;
 	size_t current_line_tabs = 0;
 	bool inside_rule = false;

diff --git a/tests/cpp/parser_tests.cpp b/tests/cpp/parser_tests.cpp
@@ -3186,6 +3186,188 @@ rule rule_2
 	EXPECT_EQ(expected, driver.getParsedFile().getTextFormatted());
 }
 
+TEST_F(ParserTests,
+RemoveLineBeforeAndWithCommentsWorks) {
+	prepareInput(
+R"(rule rule_1 {
+	strings:
+		$1 = "plain string" wide
+		$2 = { ab cd ef }
+		$3 = /ab*c/
+	condition:
+		any of them
+		// cuckoo
+		or (
+		true
+		// gvma
+		and false)
+}
+
+rule rule_2
+{
+	condition:
+		true
+		/* cuckoo */
+		
+		or false
+}
+)");
+
+	EXPECT_TRUE(driver.parse(input));
+	ASSERT_EQ(2u, driver.getParsedFile().getRules().size());
+
+	EXPECT_EQ(
+R"(rule rule_1 {
+	strings:
+		$1 = "plain string" wide
+		$2 = { AB CD EF }
+		$3 = /ab*c/
+	condition:
+		any of them or (true and false)
+}
+
+rule rule_2 {
+	condition:
+		true or false
+})", driver.getParsedFile().getText());
+
+	std::string expected = R"(rule rule_1
+{
+	strings:
+		$1 = "plain string" wide
+		$2 = { ab cd ef }
+		$3 = /ab*c/
+	condition:
+		any of them or
+		// cuckoo
+		(
+			true and
+			// gvma
+			false
+		)
+}
+
+rule rule_2
+{
+	condition:
+		true or
+		/* cuckoo */
+		false
+}
+)";
+
+	EXPECT_EQ(expected, driver.getParsedFile().getTextFormatted());
+}
+
+TEST_F(ParserTests,
+RemoveLineBeforeAndWithComments2Works) {
+	prepareInput(
+R"(
+import "cuckoo"
+
+rule rule_1 {
+	strings:
+		$1 = "plain string" wide
+		$2 = { ab cd ef }
+		$3 = /ab*c/
+	condition:
+		any of them // cuckoo
+		or (
+		true // gvma
+
+		and false)
+}
+
+rule rule_2
+{
+	condition:
+		true /* cuckoo */ or false
+}
+
+rule rule_3
+{
+	condition:
+		//cuckoo
+		cuckoo.sync.mutex(/a/)
+		
+		or cuckoo.sync.mutex(/b/)
+		
+		//cuckoo 64-bit
+
+	
+		and cuckoo.sync.mutex(/c/)
+
+
+
+
+		or cuckoo.sync.mutex(/d/)
+}
+)");
+
+	EXPECT_TRUE(driver.parse(input));
+	ASSERT_EQ(3u, driver.getParsedFile().getRules().size());
+
+	EXPECT_EQ(
+R"(import "cuckoo"
+
+rule rule_1 {
+	strings:
+		$1 = "plain string" wide
+		$2 = { AB CD EF }
+		$3 = /ab*c/
+	condition:
+		any of them or (true and false)
+}
+
+rule rule_2 {
+	condition:
+		true or false
+}
+
+rule rule_3 {
+	condition:
+		cuckoo.sync.mutex(/a/) or cuckoo.sync.mutex(/b/) and cuckoo.sync.mutex(/c/) or cuckoo.sync.mutex(/d/)
+})", driver.getParsedFile().getText());
+
+	std::string expected = R"(
+import "cuckoo"
+
+rule rule_1
+{
+	strings:
+		$1 = "plain string" wide
+		$2 = { ab cd ef }
+		$3 = /ab*c/
+	condition:
+		any of them or // cuckoo
+		(
+			true and // gvma
+			false
+		)
+}
+
+rule rule_2
+{
+	condition:
+		true /* cuckoo */ or
+		false
+}
+
+rule rule_3
+{
+	condition:
+		//cuckoo
+		cuckoo.sync.mutex(/a/) or
+		cuckoo.sync.mutex(/b/) and
+		//cuckoo 64-bit
+		cuckoo.sync.mutex(/c/) or
+		cuckoo.sync.mutex(/d/)
+}
+)";
+
+	EXPECT_EQ(expected, driver.getParsedFile().getTextFormatted());
+}
+
 TEST_F(ParserTests,
 MultipleRulesWorks2) {
 	prepareInput(