Skip to content

Commit

Permalink
Regexp classes are now ended with first occurence of unescaped ] (#70)
Browse files Browse the repository at this point in the history
* Regexp classes are now ended with first occurence of unescaped ], just like in yara.

* Fix TYPO: incorrect usage of tab instead of space
  • Loading branch information
TadeasKucera committed Feb 26, 2020
1 parent 704f99d commit 60e8b07
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 38 deletions.
1 change: 0 additions & 1 deletion include/yaramod/parser/parser_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ class ParserDriver
std::string _indent; ///< Variable storing current indentation
std::string _comment; ///< For incremental construction of parsed comments
std::string _regexpClass; ///< Currently processed regular expression class.
std::size_t _regexpClassDepth = 0; ///< The rectangular brackets depth of nesting
pog::Parser<Value> _parser; ///< used pog parser

bool _sectionStrings = false; ///< flag used to determine if we parse section after 'strings:'
Expand Down
44 changes: 9 additions & 35 deletions src/parser/parser_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,38 +364,15 @@ void ParserDriver::defineTokens()
_parser.token(R"(\\.)").states("$regexp").symbol("REGEXP_CHAR").description("regexp .").action([](std::string_view str) -> Value {
return std::string{str};
});
_parser.token(R"(\[\^\])").states("$regexp").enter_state("$regexp_class").action([&](std::string_view) -> Value {
_regexpClassDepth = 1;
_regexpClass = "^]";
return {};
});
_parser.token(R"(\[\])").states("$regexp").enter_state("$regexp_class").action([&](std::string_view) -> Value {
_regexpClassDepth = 1;
_regexpClass = "]";
return {};
});
_parser.token(R"(\[\^)").states("$regexp").enter_state("$regexp_class").action([&](std::string_view) -> Value {
_regexpClassDepth = 1;
_regexpClass = "^";
return {};
} );
_parser.token(R"(\[\^\])").states("$regexp").enter_state("$regexp_class").action([&](std::string_view) -> Value { _regexpClass = "^]"; return {}; });
_parser.token(R"(\[\])").states("$regexp").enter_state("$regexp_class").action([&](std::string_view) -> Value { _regexpClass = "]"; return {}; });
_parser.token(R"(\[\^)").states("$regexp").enter_state("$regexp_class").action([&](std::string_view) -> Value { _regexpClass = "^"; return {}; });
_parser.token(R"(\[)").states("$regexp").enter_state("$regexp_class").action([&](std::string_view) -> Value {
_regexpClassDepth = 1;
_regexpClass.clear();
return {};
} );
_parser.token(R"(\])").states("$regexp_class").symbol("REGEXP_CLASS").description("regexp class").action([&](std::string_view) -> Value {
--_regexpClassDepth;
if(_regexpClassDepth == 0)
{
enter_state("$regexp");
return std::make_pair(true, _regexpClass);
}
else
{
_regexpClass += "]";
return std::make_pair(false, std::string{});
}
_parser.token(R"(\])").states("$regexp_class").symbol("REGEXP_CLASS").description("regexp class").enter_state("$regexp").action([&](std::string_view) -> Value {
return std::make_pair(true, _regexpClass);
});
_parser.token(R"(\\w)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\w"; return {};});
_parser.token(R"(\\W)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\W"; return {};});
Expand All @@ -405,13 +382,10 @@ void ParserDriver::defineTokens()
_parser.token(R"(\\D)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\D"; return {};});
_parser.token(R"(\\b)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\b"; return {};});
_parser.token(R"(\\B)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\B"; return {};});
_parser.token(R"(\\\])").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\]"; --_regexpClassDepth; return {};});
_parser.token(R"(\\\[)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\["; ++_regexpClassDepth; return {};});
_parser.token(R"(\[)").states("$regexp_class").action([&](std::string_view) -> Value {
++_regexpClassDepth;
_regexpClass += "[";
return {};
});
_parser.token(R"(\\\\)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\\\"; return {};});
_parser.token(R"(\\\])").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\]"; return {};});
_parser.token(R"(\\\[)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "\\["; return {};});
_parser.token(R"(\[)").states("$regexp_class").action([&](std::string_view) -> Value { _regexpClass += "["; return {}; });
_parser.token(R"([^\]\[])").states("$regexp_class").action([&](std::string_view str) -> Value { _regexpClass += std::string{str}[0]; return {}; });
// $regexp end

Expand Down
36 changes: 34 additions & 2 deletions tests/cpp/parser_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ R"(
rule regexp_with_unescaped_square_brackets_inside_class
{
strings:
$1 = /[[d][]***[abc]**][[]**]/
$1 = /[ [\]{}*+,\/]{2,6}OUTSIDE[ [\]{}*+?@|_]OUTSIDE/
$2 = /[ !#()[\]{}*][ !#[\]+_]/
$3 = /[[\]*+]/
$4 = /[\[\]*+]/
Expand All @@ -1170,7 +1170,7 @@ rule regexp_with_unescaped_square_brackets_inside_class
auto regexp1 = strings[0];
EXPECT_TRUE(regexp1->isRegexp());
EXPECT_EQ("$1", regexp1->getIdentifier());
EXPECT_EQ(R"(/[[d][]***[abc]**][[]**]/)", regexp1->getText());
EXPECT_EQ(R"(/[ [\]{}*+,\/]{2,6}OUTSIDE[ [\]{}*+?@|_]OUTSIDE/)", regexp1->getText());

auto regexp2 = strings[1];
EXPECT_TRUE(regexp2->isRegexp());
Expand All @@ -1190,6 +1190,38 @@ rule regexp_with_unescaped_square_brackets_inside_class
EXPECT_EQ(input_text, driver.getParsedFile().getTextFormatted());
}


TEST_F(ParserTests,
ComplicatedRegexpClassWorks) {
prepareInput(
R"(
import "cuckoo"
import "pe"
rule rule_with_complicated_regexp_class
{
condition:
cuckoo.process.executed_command(/[^\\]+/)
and
cuckoo.filesystem.file_write(/\.bribe$/)
and
cuckoo.filesystem.file_write(/[\]}]\.(b[0-2]+|VC[0-9]*|DAQ)$/)
}
)");

EXPECT_TRUE(driver.parse(input));
ASSERT_EQ(1u, driver.getParsedFile().getRules().size());

const auto& rule = driver.getParsedFile().getRules()[0];
EXPECT_EQ("rule_with_complicated_regexp_class", rule->getName());
EXPECT_EQ(Rule::Modifier::None, rule->getModifier());

EXPECT_EQ("cuckoo.process.executed_command(/[^\\\\]+/) and cuckoo.filesystem.file_write(/\\.bribe$/) and cuckoo.filesystem.file_write(/[\\]}]\\.(b[0-2]+|VC[0-9]*|DAQ)$/)", rule->getCondition()->getText());

EXPECT_EQ(input_text, driver.getParsedFile().getTextFormatted());
}


TEST_F(ParserTests,
RegexpWithIterationWorks) {
prepareInput(
Expand Down

0 comments on commit 60e8b07

Please sign in to comment.