Skip to content

Commit

Permalink
Merge pull request #252 from vojone/of_at_expression
Browse files Browse the repository at this point in the history
Of ... at expression
  • Loading branch information
metthal committed Jul 31, 2023
2 parents dfb16f2 + 401ceac commit 563335e
Show file tree
Hide file tree
Showing 12 changed files with 285 additions and 24 deletions.
5 changes: 4 additions & 1 deletion docs/rtd/creating_rulesets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ basic expressions and find the most suitable one.
* ``for_loop(spec, var1, var2, set, body)`` - represents ``for`` loop over dictionary (``for_loop(any(), "k", "v", id("pe").access("version_info"), True)``)
* ``for_loop(spec, set, body)`` - represents ``for`` loop over set of string references (``for_loop(any(), set({string_ref("$*")}), match_at("$", int_val(100))``)
* ``of(spec, set)`` - represents ``<spec> of <set>`` (``of(all(), them())``)
* ``of_in_range(spec, set, range)`` - represents ``<spec> of <set> in <range>`` (``of(all(), them(), range(int_val(100), int_val(200)))``)
* ``of_at(spec, set, offset)`` - represents ``<spec> of <set> at <offset>`` (``of(all(), them(), int_val(200))``)
* ``of(spec, iterable)`` - represents ``<spec> of <iterable>`` (``of(any(), iterable([bool_val(False), bool_val(True)]))``)
* ``paren(expr, [newline])`` - represents parentheses around expressions and ``newline`` indicator for putting enclosed expression on its own line (``paren(int_val(10))``)
* ``conjunction(terms, [newline])`` - represents conjunction of ``terms`` and optionally puts them on each separate line if ``newline`` is set (``conjunction({id("rule1"), id("rule2")})``)
Expand Down Expand Up @@ -341,7 +343,8 @@ basic expressions and find the most suitable one.
* ``forLoop(spec, var1, var2, set, body)`` - represents ``for`` loop over dictionary (``for_loop(any(), "k", "v", id("pe").access("version_info"), true)``)
* ``forLoop(spec, set, body)`` - represents ``for`` loop over set of string references (``forLoop(any(), set({stringRef("$*")}), matchAt("$", intVal(100))``)
* ``of(spec, set)`` - represents ``<spec> of <set>`` (``of(all(), them())``)
* ``of(spec, set, range)`` - represents ``<spec> of <set> in <range>`` (``of(all(), them(), range(intVal(100), intVal(200)))``)
* ``ofInRange(spec, set, range)`` - represents ``<spec> of <set> in <range>`` (``of(all(), them(), range(intVal(100), intVal(200)))``)
* ``ofAt(spec, set, offset)`` - represents ``<spec> of <set> at <offset>`` (``of(all(), them(), intVal(200))``)
* ``paren(expr, [newline])`` - represents parentheses around expressions and ``newline`` indicator for putting enclosed expression on its own line (``paren(intVal(10))``)
* ``conjunction(terms, [newline])`` - represents conjunction of ``terms`` and optionally puts them on each separate line if ``newline`` is set (``conjunction({id("rule1"), id("rule2")})``). The ``terms`` parameter can be an array containing other expressions to be put together in the conjunction. But also ``terms`` can be an array of pairs, where each pair contains a term to be put in the conjunction and a comment, which will be associated with the term and printed on the same line
* ``disjunction(terms, [newline])`` - represents disjunction of ``terms`` and optionally puts them on each separate line if ``newline`` is set (``disjunction({id("rule1"), id("rule2")})``). The ``terms`` parameter can be an array containing other expressions to be put together in the disjunction. But also ``terms`` can be an array of pairs, where each pair contains a term to be put in the disjunction and a comment, which will be associated with the term and printed on the same line
Expand Down
2 changes: 1 addition & 1 deletion docs/rtd/parsing_rulesets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ All of these provide method ``getVariable()`` (``variable`` in Python) to return
* ``ForDictExpression`` - refers to ``for`` which operates on dictionary (``for all k, v in some_dict : ( ... )``)
* ``ForArrayExpression`` - refers to ``for`` which operates on array or set of integers (``for all section in pe.sections : ( ... )``)
* ``ForStringExpression`` - refers to ``for`` which operates on set of string identifiers (``for all of ($str1, $str2) : ( ... )``)
* ``OfExpression`` - refers to ``of`` (``all of ($str1, $str2)`` or ``all of ($str1, $str2) in (filesize-500..filesize)``)
* ``OfExpression`` - refers to ``of`` (``all of ($str1, $str2)`` or ``all of ($str1, $str2) in (filesize-500..filesize)`` or ``any of ($str1, $str2) at 0``)

**Identificator expressions**

Expand Down
3 changes: 2 additions & 1 deletion include/yaramod/builder/yara_expression_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ YaraExpressionBuilder forLoop(const YaraExpressionBuilder& forExpr, const std::s
YaraExpressionBuilder forLoop(const YaraExpressionBuilder& forExpr, const std::string& id, const YaraExpressionBuilder& iterable, const YaraExpressionBuilder& expr);
YaraExpressionBuilder forLoop(const YaraExpressionBuilder& forExpr, const YaraExpressionBuilder& set, const YaraExpressionBuilder& expr);
YaraExpressionBuilder of(const YaraExpressionBuilder& ofExpr, const YaraExpressionBuilder& set);
YaraExpressionBuilder of(const YaraExpressionBuilder& quantifier, const YaraExpressionBuilder& set, const YaraExpressionBuilder& range);
YaraExpressionBuilder ofInRange(const YaraExpressionBuilder& quantifier, const YaraExpressionBuilder& set, const YaraExpressionBuilder& range);
YaraExpressionBuilder ofAt(const YaraExpressionBuilder& quantifier, const YaraExpressionBuilder& set, const YaraExpressionBuilder& offset);

YaraExpressionBuilder iterable(const std::vector<YaraExpressionBuilder>& elements);

Expand Down
49 changes: 36 additions & 13 deletions include/yaramod/types/expressions.h
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,16 @@ class ForStringExpression : public ForExpression
* @code
* all of ($str1, $str2)
* @endcode
*
* There can be also "in" with range:
* @code
* any of ($str1, $str2) in (0..10)
* @endcode
*
* Or "at" with offset (new in YARA 4.3):
* @code
* any of ($str1, $str2) at 100
* @endcode
*/
class OfExpression : public ForExpression
{
Expand All @@ -1340,18 +1350,18 @@ class OfExpression : public ForExpression
template <typename ExpPtr1, typename ExpPtr2>
OfExpression(ExpPtr1&& forExpr, TokenIt of, ExpPtr2&& set)
: ForExpression(std::forward<ExpPtr1>(forExpr), of, std::forward<ExpPtr2>(set))
, _in_symbol(std::nullopt)
, _range(nullptr)
, _location_symbol(std::nullopt)
, _location(nullptr)
{
}
/**
* Constructor
*/
template <typename ExpPtr1, typename ExpPtr2, typename ExpPtr3>
OfExpression(ExpPtr1&& forExpr, TokenIt of, ExpPtr2&& set, TokenIt in_symbol, ExpPtr3&& range)
OfExpression(ExpPtr1&& forExpr, TokenIt of, ExpPtr2&& set, TokenIt location_symbol, ExpPtr3&& location)
: ForExpression(std::forward<ExpPtr1>(forExpr), of, std::forward<ExpPtr2>(set))
, _in_symbol(in_symbol)
, _range(std::forward<ExpPtr3>(range))
, _location_symbol(location_symbol)
, _location(std::forward<ExpPtr3>(location))
{
}

Expand All @@ -1363,21 +1373,34 @@ class OfExpression : public ForExpression
virtual std::string getText(const std::string& indent = std::string{}) const override
{
std::string output = _forExpr->getText(indent) + " " + _of_in->getString() + " " + _iterable->getText(indent);
if (_range && _in_symbol.has_value())
output += " " + _in_symbol.value()->getString() + " " + _range->getText(indent);
if (_location && _location_symbol.has_value())
output += " " + _location_symbol.value()->getString() + " " + _location->getText(indent);
return output;
}

const Expression::Ptr& getRangeExpression() const { return _range; }
void setRangeExpression(const Expression::Ptr& range) { _range = range; }
void setRangeExpression(Expression::Ptr&& range) { _range = std::move(range); }
/**
* Getter for location expression
* @return Return location expression
*/
const Expression::Ptr& getLocationExpression() const { return _location; }
void setLocationExpression(const Expression::Ptr& location) { _location = location; }
void setLocationExpression(Expression::Ptr&& location) { _location = std::move(location); }

/**
* Same as OfExpression::getLocationExpression
* @note It is named as getRangeExpression to preserve backward compatibility
*/
const Expression::Ptr& getRangeExpression() const { return getLocationExpression(); }
void setRangeExpression(const Expression::Ptr& range) { setLocationExpression(range); }
void setRangeExpression(Expression::Ptr&& range) { setLocationExpression(std::move(range)); }

virtual TokenIt getFirstTokenIt() const override { return _forExpr->getFirstTokenIt(); }
virtual TokenIt getLastTokenIt() const override { return _range ? _range->getLastTokenIt() : _iterable->getLastTokenIt(); }
virtual TokenIt getLastTokenIt() const override { return _location ? _location->getLastTokenIt() : _iterable->getLastTokenIt(); }

private:
std::optional<TokenIt> _in_symbol; ///< Token holding "in"
Expression::Ptr _range; ///< Range expression
// Range and offset expression is stored in the same member _location, there cannot be offset and range at the same time
std::optional<TokenIt> _location_symbol; ///< Token holding "in" or "at"
Expression::Ptr _location; ///< Range expression ("in" <range>) or offset expression ("at" <offset>)
};

/**
Expand Down
25 changes: 24 additions & 1 deletion src/builder/yara_expression_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1224,7 +1224,7 @@ YaraExpressionBuilder of(const YaraExpressionBuilder& ofExpr, const YaraExpressi
*
* @return Builder.
*/
YaraExpressionBuilder of(const YaraExpressionBuilder& quantifier, const YaraExpressionBuilder& set, const YaraExpressionBuilder& range)
YaraExpressionBuilder ofInRange(const YaraExpressionBuilder& quantifier, const YaraExpressionBuilder& set, const YaraExpressionBuilder& range)
{
auto ts = std::make_shared<TokenStream>();
ts->moveAppend(quantifier.getTokenStream());
Expand All @@ -1238,6 +1238,29 @@ YaraExpressionBuilder of(const YaraExpressionBuilder& quantifier, const YaraExpr
return YaraExpressionBuilder(std::move(ts), std::move(expression), Expression::Type::Bool);
}

/**
* Creates the expression with match of the specified set of strings at given offset.
*
* @param quantifier All / Any / None expression.
* @param set Set expression.
* @param offset Offset expression.
*
* @return Builder.
*/
YaraExpressionBuilder ofAt(const YaraExpressionBuilder& quantifier, const YaraExpressionBuilder& set, const YaraExpressionBuilder& offset)
{
auto ts = std::make_shared<TokenStream>();
ts->moveAppend(quantifier.getTokenStream());
auto ofToken = ts->emplace_back(TokenType::OF, "of");
ts->moveAppend(set.getTokenStream());
auto atToken = ts->emplace_back(TokenType::OP_AT, "at");
ts->moveAppend(offset.getTokenStream());

auto expression = std::make_shared<OfExpression>(quantifier.get(), ofToken, set.get(), atToken, offset.get());

return YaraExpressionBuilder(std::move(ts), std::move(expression), Expression::Type::Bool);
}

/**
* Creates an iterable array of elements.
*
Expand Down
19 changes: 19 additions & 0 deletions src/parser/parser_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1346,6 +1346,25 @@ void ParserDriver::defineGrammar()
output->setUid(_uidGen.next());
return output;
})
.production("for_expression", "OF", "string_set", "AT", "primary_expression", [&](auto&& args) -> Value {
auto for_expr = std::move(args[0].getExpression());
TokenIt of = args[1].getTokenIt();
auto set = std::move(args[2].getExpression());
TokenIt at = args[3].getTokenIt();
Expression::Ptr offset = args[4].getExpression();

if (!offset->isInt()) {
std::stringstream ss;
ss << "Operator 'at' expects integer on the right-hand side of the expression. Got " << offset->getText() << ".";
error_handle(args[4].getTokenIt()->getLocation(), ss.str());
}

auto output = std::make_shared<OfExpression>(std::move(for_expr), of, std::move(set), at, offset);
output->setType(Expression::Type::Bool);
output->setTokenStream(currentTokenStream());
output->setUid(_uidGen.next());
return output;
})
.production("primary_expression", "PERCENT", "OF", "string_set", [&](auto&& args) -> Value {
auto for_expr = std::move(args[0].getExpression());
TokenIt percent = args[1].getTokenIt();
Expand Down
6 changes: 3 additions & 3 deletions src/python/typings/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ class ObservingVisitor(Visitor):

class OfExpression(ForExpression):
range: Expression
location: Expression
def __init__(self, *args, **kwargs) -> None: ...

class OrExpression(BinaryOpExpression):
Expand Down Expand Up @@ -1597,10 +1598,9 @@ def not_(arg0: YaraExpressionBuilder) -> YaraExpressionBuilder: ...
def not_byte(arg0: int) -> YaraHexStringBuilder: ...
def not_wildcard_high(arg0: int) -> YaraHexStringBuilder: ...
def not_wildcard_low(arg0: int) -> YaraHexStringBuilder: ...
@overload
def of(arg0: YaraExpressionBuilder, arg1: YaraExpressionBuilder) -> YaraExpressionBuilder: ...
@overload
def of(arg0: YaraExpressionBuilder, arg1: YaraExpressionBuilder, arg2: YaraExpressionBuilder) -> YaraExpressionBuilder: ...
def of_in_range(arg0: YaraExpressionBuilder, arg1: YaraExpressionBuilder, arg2: YaraExpressionBuilder) -> YaraExpressionBuilder: ...
def of_at(arg0: YaraExpressionBuilder, arg1: YaraExpressionBuilder, arg2: YaraExpressionBuilder) -> YaraExpressionBuilder: ...
def paren(enclosed_expr: YaraExpressionBuilder, linebreaks: bool = ...) -> YaraExpressionBuilder: ...
def range(arg0: YaraExpressionBuilder, arg1: YaraExpressionBuilder) -> YaraExpressionBuilder: ...
def regexp(arg0: str, arg1: str) -> YaraExpressionBuilder: ...
Expand Down
8 changes: 6 additions & 2 deletions src/python/yaramod_python.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,10 @@ void addExpressionClasses(py::module& module)
exprClass<OfExpression, ForExpression>(module, "OfExpression")
.def_property("range",
&OfExpression::getRangeExpression,
py::overload_cast<const Expression::Ptr&>(&OfExpression::setRangeExpression));
py::overload_cast<const Expression::Ptr&>(&OfExpression::setRangeExpression))
.def_property("location",
&OfExpression::getLocationExpression,
py::overload_cast<const Expression::Ptr&>(&OfExpression::setLocationExpression));

exprClass<IterableExpression>(module, "IterableExpression")
.def_property("elements",
Expand Down Expand Up @@ -949,7 +952,8 @@ void addBuilderClasses(py::module& module)
const YaraExpressionBuilder&
>(&forLoop));
module.def("of", py::overload_cast<const YaraExpressionBuilder&, const YaraExpressionBuilder&>(&of));
module.def("of", py::overload_cast<const YaraExpressionBuilder&, const YaraExpressionBuilder&, const YaraExpressionBuilder&>(&of));
module.def("of_in_range", py::overload_cast<const YaraExpressionBuilder&, const YaraExpressionBuilder&, const YaraExpressionBuilder&>(&ofInRange));
module.def("of_at", py::overload_cast<const YaraExpressionBuilder&, const YaraExpressionBuilder&, const YaraExpressionBuilder&>(&ofAt));

module.def("iterable", &iterable);

Expand Down
41 changes: 40 additions & 1 deletion tests/cpp/builder_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,45 @@ RuleWithRangeWorks) {
)", yaraFile->getTextFormatted());
}

TEST_F(BuilderTests,
RuleWithOfAtWorks) {
auto cond = ofAt(any(), them(), intVal(10)).get();
EXPECT_EQ("any", cond->getFirstTokenIt()->getPureText());
EXPECT_EQ("10", cond->getLastTokenIt()->getPureText());

YaraRuleBuilder newRule;
auto rule = newRule
.withName("rule_with_of_at")
.withPlainString("$1", "Hello World!")
.withPlainString("$2", "Ahoj Svet!")
.withCondition(cond)
.get();

YaraFileBuilder newFile;
auto yaraFile = newFile
.withRule(std::move(rule))
.get(true);

ASSERT_NE(nullptr, yaraFile);
EXPECT_EQ(R"(rule rule_with_of_at {
strings:
$1 = "Hello World!"
$2 = "Ahoj Svet!"
condition:
any of them at 10
})", yaraFile->getText());

EXPECT_EQ(R"(rule rule_with_of_at
{
strings:
$1 = "Hello World!"
$2 = "Ahoj Svet!"
condition:
any of them at 10
}
)", yaraFile->getTextFormatted());
}

TEST_F(BuilderTests,
RuleWithStructureWorks) {
auto cond = (id("pe").access("number_of_sections") > intVal(1)).get();
Expand Down Expand Up @@ -1725,7 +1764,7 @@ RuleWithNoneOfThemExpression) {

TEST_F(BuilderTests,
RuleWithNoneOfThemInRangeWorks) {
auto cond = of(none(), them(), range(intVal(0), filesize())).get();
auto cond = ofInRange(none(), them(), range(intVal(0), filesize())).get();
EXPECT_EQ("none", cond->getFirstTokenIt()->getPureText());
EXPECT_EQ(")", cond->getLastTokenIt()->getPureText());

Expand Down
75 changes: 75 additions & 0 deletions tests/cpp/parser_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3270,6 +3270,81 @@ rule of_condition
EXPECT_EQ(input_text, driver.getParsedFile().getTextFormatted());
}

TEST_F(ParserTests,
OfAtConditionWorks) {
prepareInput(
R"(
rule of_condition
{
strings:
$a = "dummy1"
$b = /dummy\d/
condition:
any of them at 0
}
)");

EXPECT_TRUE(driver.parse(input));
ASSERT_EQ(1u, driver.getParsedFile().getRules().size());

const auto& rule = driver.getParsedFile().getRules()[0];
EXPECT_EQ("any of them at 0", rule->getCondition()->getText());
EXPECT_EQ("any", rule->getCondition()->getFirstTokenIt()->getPureText());
EXPECT_EQ("0", rule->getCondition()->getLastTokenIt()->getPureText());

EXPECT_EQ(input_text, driver.getParsedFile().getTextFormatted());
}

TEST_F(ParserTests,
OfAtWithStringSetConditionWorks) {
prepareInput(
R"(
rule of_condition
{
strings:
$a = "dummy1"
$b = /dummy\d/
condition:
1 of ($a, $b) at 3
}
)");

EXPECT_TRUE(driver.parse(input));
ASSERT_EQ(1u, driver.getParsedFile().getRules().size());

const auto& rule = driver.getParsedFile().getRules()[0];
EXPECT_EQ("1 of ($a, $b) at 3", rule->getCondition()->getText());
EXPECT_EQ("1", rule->getCondition()->getFirstTokenIt()->getPureText());
EXPECT_EQ("3", rule->getCondition()->getLastTokenIt()->getPureText());

EXPECT_EQ(input_text, driver.getParsedFile().getTextFormatted());
}

TEST_F(ParserTests,
OfWithStringSetAndAtWithExpressionConditionWorks) {
prepareInput(
R"(
rule of_condition
{
strings:
$a = "dummy1"
$b = /dummy\d/
condition:
any of ($a, $b) at filesize - 10
}
)");

EXPECT_TRUE(driver.parse(input));
ASSERT_EQ(1u, driver.getParsedFile().getRules().size());

const auto& rule = driver.getParsedFile().getRules()[0];
EXPECT_EQ("any of ($a, $b) at filesize - 10", rule->getCondition()->getText());
EXPECT_EQ("any", rule->getCondition()->getFirstTokenIt()->getPureText());
EXPECT_EQ("10", rule->getCondition()->getLastTokenIt()->getPureText());

EXPECT_EQ(input_text, driver.getParsedFile().getTextFormatted());
}

TEST_F(ParserTests,
EmptyStringMetaValue) {
prepareInput(
Expand Down

0 comments on commit 563335e

Please sign in to comment.