Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

YARA-1806: Fix removing of parts of rules in tokenstream #206

Merged
merged 11 commits into from
Mar 22, 2022
2 changes: 2 additions & 0 deletions include/yaramod/types/token_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class TokenStream
const Token& front() const { return _tokens.front(); }
const Token& back() const { return _tokens.back(); }
const std::list<Token>& getTokens() const { return _tokens; }
std::pair<TokenIt, TokenIt> findBounds(TokenIt embedded, TokenType leftType, TokenType rightType) const;
/// @}

/// @name Iterators
Expand Down Expand Up @@ -142,6 +143,7 @@ class TokenStream
friend std::ostream& operator<<(std::ostream& os, TokenStream& ts) { return os << ts.getText(false); }
std::string getText(bool withIncludes = false, bool alignComments = true);
std::vector<std::string> getTokensAsText() const;
std::string getTokensAsString() const;
/// @}

/// @name New Line Characters
Expand Down
18 changes: 14 additions & 4 deletions include/yaramod/types/yara_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,13 @@ class YaraFile
{
auto itr = std::stable_partition(_imports.begin(), _imports.end(), [&](const auto& i) { return !fn(i); });
for (auto rem_itr = itr; rem_itr != _imports.end(); ++rem_itr)
_importTable.erase(_importTable.find((*rem_itr)->getName()));
{
auto rem_import = _importTable.find((*rem_itr)->getName());
auto import_token = rem_import->second.first;
auto bounds = _tokenStream->findBounds(import_token, TokenType::IMPORT_KEYWORD, TokenType::NEW_LINE);
_tokenStream->erase(bounds.first, std::next(bounds.second));
_importTable.erase(rem_import);
}
_imports.erase(itr, _imports.end());
}

Expand All @@ -73,8 +79,12 @@ class YaraFile
for (auto rem_itr = itr; rem_itr != _rules.end(); ++rem_itr)
{
_ruleTable.erase(_ruleTable.find((*rem_itr)->getName()));
auto behind = _tokenStream->erase((*rem_itr)->getFirstTokenIt(), std::next((*rem_itr)->getLastTokenIt()));
while (behind != _tokenStream->end() && behind->getType() == TokenType::NEW_LINE)
auto from = (*rem_itr)->getFirstTokenIt();
if (from != _tokenStream->begin() && from->getType() == TokenType::NEW_LINE)
from = std::prev(from);
auto to = std::next((*rem_itr)->getLastTokenIt());
auto behind = _tokenStream->erase(from, to);
while (behind != _tokenStream->end() && behind != _tokenStream->begin() && behind->getType() == TokenType::NEW_LINE)
behind = _tokenStream->erase(behind);
}
_rules.erase(itr, _rules.end());
Expand All @@ -100,7 +110,7 @@ class YaraFile
std::vector<std::shared_ptr<Module>> _imports; ///< Imported modules
std::vector<std::shared_ptr<Rule>> _rules; ///< Rules

std::unordered_map<std::string, Module*> _importTable;
std::unordered_map<std::string, std::pair<TokenIt, Module*>> _importTable;
std::unordered_map<std::string, Rule*> _ruleTable;

Features _Features; ///< Determines which symbols are needed
Expand Down
24 changes: 24 additions & 0 deletions src/types/rule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,30 @@ const Meta* Rule::getMetaWithName(const std::string& key) const
*/
TokenIt Rule::getFirstTokenIt() const
{
if (isPrivate() && isGlobal())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels like a workaround for the problem. What if the new modifier is added? Are we going to list out all the possible combinations? Why not storing the modifiers in some sort of containers. Whenever the first token is requested just the first token is returned from the container.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for pointing this out. I have changed the way we work with modifiers so it should be much easier now to add more rule modifiers. Please see the last commit.

{
assert(_mod_private.has_value());
assert(_mod_global.has_value());
for (auto it = _mod_private.value(); it != _tokenStream->end(); ++it)
{
std::cout << it->getText() << std::endl;
if (it->getType() == TokenType::RULE_BEGIN)
return _mod_global.value();
if (it->getType() == TokenType::GLOBAL)
return _mod_private.value();
}
return _mod_global.value();
}
if (isPrivate())
{
assert(_mod_private.has_value());
return _mod_private.value();
}
if (isGlobal())
{
assert(_mod_global.has_value());
return _mod_global.value();
}
return _tokenStream->findBackwards(TokenType::RULE, _name);
}

Expand Down
20 changes: 20 additions & 0 deletions src/types/token_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ namespace yaramod {

constexpr unsigned tabulator_length = 8;


std::pair<TokenIt, TokenIt> TokenStream::findBounds(TokenIt embedded, TokenType leftType, TokenType rightType) const
{
auto left = embedded;
auto right = embedded;
while (left != begin() && left->getType() != leftType)
left = std::prev(left);
while (right != end() && right->getType() != rightType)
right = std::next(right);
return std::make_pair(left, right);
}

TokenIt TokenStream::emplace_back(TokenType type, char value)
{
_tokens.emplace_back(type, Literal(std::string(1, value)));
Expand Down Expand Up @@ -339,6 +351,14 @@ std::vector<std::string> TokenStream::getTokensAsText() const
return output;
}

std::string TokenStream::getTokensAsString() const
{
std::stringstream ss;
for (const auto& t : getTokensAsText())
ss << "'" << t << "', ";
return ss.str();
}

void TokenStream::clear()
{
_tokens.clear();
Expand Down
4 changes: 2 additions & 2 deletions src/types/yara_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ bool YaraFile::addImport(TokenIt import, ModulePool& modules)
return true;

_imports.push_back(std::move(module));
_importTable.emplace(_imports.back()->getName(), _imports.back().get());
_importTable.emplace(_imports.back()->getName(), std::make_pair(import, _imports.back().get()));
return true;
}

Expand Down Expand Up @@ -389,7 +389,7 @@ std::shared_ptr<Symbol> YaraFile::findSymbol(const std::string& name) const
return itr->second->getSymbol();

if (auto itr = _importTable.find(name); itr != _importTable.end())
return itr->second->getStructure();
return itr->second.second->getStructure();

for (const auto& vtSymbol : _vtSymbols)
{
Expand Down
68 changes: 68 additions & 0 deletions tests/cpp/visitor_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,74 @@ rule rule_5
EXPECT_EQ(expected, yara_file.getTextFormatted());
}

TEST_F(VisitorTests,
RuleModifierWorksWhenDeletingRules) {
prepareInput(
R"(
global rule rule_4 {
condition:
false
}

global rule delete_rule_3 {
condition:
false
}

private global rule delete_rule_7 {
condition:
delete_rule_3
}

private global rule rule_5 {
condition:
not delete_rule_3
}

private global rule delete_rule_6 {
condition:
false
}
)");

EXPECT_TRUE(driver.parse(input));
auto yara_file = driver.getParsedFile();
ASSERT_EQ(5u, yara_file.getRules().size());

RuleDeleter visitor;
visitor.process(yara_file);

ASSERT_EQ(2u, yara_file.getRules().size());

EXPECT_EQ(
R"(global rule rule_4 {
condition:
false
}

private global rule rule_5 {
condition:
false
})", yara_file.getText());

std::string expected = R"(
global rule rule_4
{
condition:
false
}

private global rule rule_5
{
condition:
false
}

)";

EXPECT_EQ(expected, yara_file.getTextFormatted());
}

TEST_F(VisitorTests,
DeletingVisitor2) {
prepareInput(
Expand Down
81 changes: 81 additions & 0 deletions tests/python/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,48 @@ def test_private_rule(self):
self.assertFalse(rule.is_global)
self.assertFalse(rule.is_private)

def test_global_private_rule(self):
yara_file = yaramod.Yaramod().parse_string('''
private global rule pg_rule {
condition:
true
}

global private rule gp_rule {
condition:
true
}''')

self.assertEqual(len(yara_file.rules), 2)

expected = r'''
private global rule pg_rule
{
condition:
true
}

global private rule gp_rule
{
condition:
true
}
'''
self.assertEqual(expected, yara_file.text_formatted)

pg_rule = yara_file.rules[0]
self.assertEqual(pg_rule.name, 'pg_rule')
self.assertEqual(pg_rule.modifier, yaramod.RuleModifier.PrivateGlobal)
self.assertTrue(pg_rule.is_global)
self.assertTrue(pg_rule.is_private)
self.assertEqual(pg_rule.token_first.type, yaramod.TokenType.Private)
gp_rule = yara_file.rules[1]
self.assertEqual(gp_rule.name, 'gp_rule')
self.assertEqual(gp_rule.modifier, yaramod.RuleModifier.PrivateGlobal)
self.assertTrue(gp_rule.is_global)
self.assertTrue(gp_rule.is_private)
self.assertEqual(gp_rule.token_first.type, yaramod.TokenType.Global)

def test_import(self):
yara_file = yaramod.Yaramod().parse_string('''
import "pe"
Expand Down Expand Up @@ -1995,6 +2037,45 @@ def test_include_file_and_import_in_regular_mode(self):

import "cuckoo"

rule rule1
{
condition:
RULE and
true
}
'''
self.assertEqual(expected, yara_file.text_formatted)

def test_remove_import(self):
yara_file = yaramod.Yaramod().parse_file('./tests/python/testing_rules/testing_file_with_import.yar')
rule = yara_file.rules[0]

self.assertEqual('''import "cuckoo"

rule RULE {
condition:
true
}

rule rule1 {
condition:
RULE and true
}''', yara_file.text)

yara_file.remove_imports(lambda i: True)

self.assertEqual('''rule RULE {
condition:
true
}

rule rule1 {
condition:
RULE and true
}''', yara_file.text)

expected = r'''include "testing_include.yar"

rule rule1
{
condition:
Expand Down