Skip to content

Commit

Permalink
Improve links between individual constructs in internal representatio…
Browse files Browse the repository at this point in the history
…n (issue #73) (#96)

* Add TokenType and ExpressionType and put in separate files to allow import of symbol.h from literal.h

* Solve #73 for rule names - renaming rule also renames all references to it

* Each string reference was just string, now it is linked to string definition so renaming will be easier.

* Remove ParserMode::Incomplete to be part of anothoer PR

* Cleanup: Remove some asserts and printouts

Co-authored-by: Tadeáš Kučera <tadeas.kucera@avast.com>
  • Loading branch information
TadeasKucera and TadeasKucera committed Apr 28, 2020
1 parent ec37e9d commit c98c014
Show file tree
Hide file tree
Showing 21 changed files with 596 additions and 238 deletions.
1 change: 1 addition & 0 deletions include/yaramod/parser/parser_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class ParserDriver
/// @name Methods for handling strings
/// @{
bool stringExists(const std::string& id) const;
const Literal* findStringDefinition(const std::string& id) const;
void setCurrentStrings(const std::shared_ptr<Rule::StringsTrie>& currentStrings);
bool sectionStrings() const { return _sectionStrings; };
void sectionStrings(bool new_value) { _sectionStrings = new_value; };
Expand Down
12 changes: 2 additions & 10 deletions include/yaramod/types/expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "yaramod/utils/visitor_result.h"
#include "yaramod/types/token_stream.h"
#include "yaramod/types/expression_type.h"

namespace yaramod {

Expand All @@ -28,16 +29,7 @@ class Expression
using Ptr = std::shared_ptr<Expression>;

///< Type of the expression.
enum class Type
{
Undefined,
Bool,
Int,
String,
Regexp,
Object,
Float
};
using Type = ExpressionType;

/// @name Constructors
/// @{
Expand Down
23 changes: 23 additions & 0 deletions include/yaramod/types/expression_type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/**
* @file src/types/expression_type.h
* @brief Declaration of class ExpressionType.
* @copyright (c) 2017 Avast Software, licensed under the MIT license
*/

#pragma once

namespace yaramod {

///< Type of the expression.
enum class ExpressionType
{
Undefined,
Bool,
Int,
String,
Regexp,
Object,
Float
};

}
27 changes: 18 additions & 9 deletions include/yaramod/types/expressions.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class StringExpression : public Expression
return v->visit(this);
}

const std::string& getId() const { return _id->getString(); }
std::string getId() const { return _id->getPureText(); }

void setId(const std::string& id) { _id->setValue(id); }
void setId(std::string&& id) { _id->setValue(std::move(id)); }
Expand Down Expand Up @@ -123,7 +123,7 @@ class StringAtExpression : public Expression
return v->visit(this);
}

const std::string& getId() const { return _id->getString(); }
std::string getId() const { return _id->getPureText(); }
const Expression::Ptr& getAtExpression() const { return _at; }

void setId(const std::string& id) { _id->setValue(id); }
Expand Down Expand Up @@ -175,7 +175,7 @@ class StringInRangeExpression : public Expression
return v->visit(this);
}

const std::string& getId() const { return _id->getString(); }
std::string getId() const { return _id->getPureText(); }
const Expression::Ptr& getRangeExpression() const { return _range; }

void setId(const std::string& id) { _id->setValue(id); }
Expand Down Expand Up @@ -219,14 +219,17 @@ class StringCountExpression : public Expression
return v->visit(this);
}

const std::string& getId() const { return _id->getString(); }
std::string getId() const { return _id->getPureText(); }

void setId(const std::string& id) { _id->setValue(id); }
void setId(std::string&& id) { _id->setValue(std::move(id)); }

virtual std::string getText(const std::string& /*indent*/ = std::string{}) const override
{
return _id->getString();
auto output = getId();
assert(output != std::string() && "String id must be non-empty.");
output[0] = '#';
return output;
}

private:
Expand Down Expand Up @@ -273,7 +276,7 @@ class StringOffsetExpression : public Expression
return v->visit(this);
}

const std::string& getId() const { return _id->getString(); }
std::string getId() const { return _id->getPureText(); }
const Expression::Ptr& getIndexExpression() const { return _expr; }

void setId(const std::string& id) { _id->setValue(id); }
Expand All @@ -283,7 +286,10 @@ class StringOffsetExpression : public Expression

virtual std::string getText(const std::string& indent = std::string{}) const override
{
return _expr ? getId() + '[' + _expr->getText(indent) + ']' : getId();
auto prefix = getId();
assert(prefix != std::string() && "String id must be non-empty.");
prefix[0] = '@';
return _expr ? prefix + '[' + _expr->getText(indent) + ']' : prefix;
}

private:
Expand Down Expand Up @@ -330,7 +336,7 @@ class StringLengthExpression : public Expression
return v->visit(this);
}

const std::string& getId() const { return _id->getString(); }
std::string getId() const { return _id->getPureText(); }
const Expression::Ptr& getIndexExpression() const { return _expr; }

void setId(const std::string& id) { _id->setValue(id); }
Expand All @@ -340,6 +346,9 @@ class StringLengthExpression : public Expression

virtual std::string getText(const std::string& indent = std::string{}) const override
{
auto prefix = getId();
assert(prefix != std::string() && "String id must be non-empty.");
prefix[0] = '!';
return _expr ? getId() + '[' + _expr->getText(indent) + ']' : getId();
}

Expand Down Expand Up @@ -1220,7 +1229,7 @@ class IdExpression : public Expression
{
_symbol = symbol;
if (_symbolToken)
_symbolToken.value()->setValue(_symbol, _symbol->getName());
_symbolToken.value()->setValue(_symbol);
}

protected:
Expand Down
33 changes: 23 additions & 10 deletions include/yaramod/types/literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
#include <variant>

#include "yaramod/yaramod_error.h"
#include "yaramod/types/symbol.h"

namespace yaramod {

class Symbol;
class String;

/**
* Class representing literal. Literal can be either
Expand All @@ -32,6 +33,8 @@ class Symbol;
class Literal
{
public:

using ReferenceType = const Literal*;
/// @name Costructors
/// @{
Literal() { assert(is<std::string>()); };
Expand All @@ -42,8 +45,9 @@ class Literal
explicit Literal(std::int64_t value, const std::optional<std::string>& integral_formatted_value = std::nullopt);
explicit Literal(std::uint64_t value, const std::optional<std::string>& integral_formatted_value = std::nullopt);
explicit Literal(double value, const std::optional<std::string>& integral_formatted_value = std::nullopt);
explicit Literal(const std::shared_ptr<Symbol>& value, const std::string& name);
explicit Literal(std::shared_ptr<Symbol>&& value, const std::string& name);
explicit Literal(const std::shared_ptr<Symbol>& value);
explicit Literal(std::shared_ptr<Symbol>&& value);
explicit Literal(ReferenceType value);

Literal(Literal&& literal) = default;
Literal(const Literal& literal) = default;
Expand All @@ -58,16 +62,18 @@ class Literal
bool isInt() const { return is<std::int64_t>() || is<std::uint64_t>(); }
bool isFloat() const { return is<double>(); }
bool isSymbol() const { return is<std::shared_ptr<Symbol>>(); }
bool isLiteralReference() const { return is<ReferenceType>(); }
/// @}

/// @name Getter methods
/// @{
const std::string& getString() const { return std::get<std::string>(_value); }
bool getBool() const { return std::get<bool>(_value); }
const std::string& getString() const { assert(isString()); return std::get<std::string>(_value); }
bool getBool() const { assert(isBool()); return std::get<bool>(_value); }
std::int64_t getInt() const { return is<std::int64_t>() ? std::get<std::int64_t>(_value) : std::get<std::uint64_t>(_value); }
std::uint64_t getUInt() const { return is<std::uint64_t>() ? std::get<std::uint64_t>(_value) : std::get<std::int64_t>(_value); }
double getFloat() const { return std::get<double>(_value); }
const std::shared_ptr<Symbol>& getSymbol() const { return std::get<std::shared_ptr<Symbol>>(_value); }
double getFloat() const { assert(isFloat()); return std::get<double>(_value); }
const std::shared_ptr<Symbol>& getSymbol() const { assert(isSymbol()); return std::get<std::shared_ptr<Symbol>>(_value); }
ReferenceType getLiteralReference() const { assert(isLiteralReference()); return std::get<ReferenceType>(_value); }
std::string getFormattedValue() const;
/// @}

Expand All @@ -79,8 +85,9 @@ class Literal
void setValue(std::int64_t i, const std::optional<std::string>& integral_formatted_value = std::nullopt);
void setValue(std::uint64_t i, const std::optional<std::string>& integral_formatted_value = std::nullopt);
void setValue(double f, const std::optional<std::string>& integral_formatted_value = std::nullopt);
void setValue(const std::shared_ptr<Symbol>& s, const std::string& symbol_name);
void setValue(std::shared_ptr<Symbol>&& s, std::string&& symbol_name);
void setValue(const std::shared_ptr<Symbol>& s);
void setValue(std::shared_ptr<Symbol>&& s);
void setValue(ReferenceType l);
/// @}

/// @name String representation
Expand All @@ -96,6 +103,12 @@ class Literal
os << literal._formatted_value.value();
else if (literal.isBool())
os << (literal.getBool() ? "true" : "false");
else if (literal.isSymbol())
os << literal.getSymbol()->getName();
else if (literal.isLiteralReference())
{
os << *(literal.getLiteralReference());
}
else
std::visit(
[&os](auto&& v)
Expand All @@ -115,7 +128,7 @@ class Literal
/// For an integral literal x there are two options:
/// i. x it is unformatted: _formatted_value is empty AND _value contains x
/// ii. x it is formatted: _formatted_value contains x's string representation AND _value contains pure x
std::variant<std::string, bool, std::int64_t, std::uint64_t, double, std::shared_ptr<Symbol>> _value; ///< Value used for all literals:
std::variant<std::string, bool, std::int64_t, std::uint64_t, double, std::shared_ptr<Symbol>, ReferenceType> _value; ///< Value used for all literals:
std::optional<std::string> _formatted_value; ///< Value used for integral literals with particular formatting
};

Expand Down
1 change: 0 additions & 1 deletion include/yaramod/types/rule.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ class Rule
std::shared_ptr<StringsTrie> _strings; ///< Strings
Expression::Ptr _condition; ///< Condition expression
std::vector<TokenIt> _tags; ///< Tags
std::shared_ptr<Symbol> _symbol; ///< Symbol representing rule
Location _location; ///< Which file was this rule included from
};

Expand Down
30 changes: 24 additions & 6 deletions include/yaramod/types/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ class String
/// @name Getter methods
/// @{
Type getType() const { return _type; }
const Literal* getIdentifierTokenIt() const
{
if (_id)
return &(_id.value()->getLiteral());
else
return nullptr;
}
std::string getIdentifier() const
{
if (_id)
Expand Down Expand Up @@ -124,15 +131,26 @@ class String

/// @name Setter methods
/// @{
template <typename Str>
void setIdentifier(Str&& id)
void setIdentifier(std::string&& id)
{
if (_id)
_id.value()->setValue(std::move(id));
else
{
auto first = getFirstTokenIt();
_id = _tokenStream->emplace(first, STRING_KEY, std::move(id));
_assignToken = _tokenStream->emplace(first, ASSIGN, "=");
}
}

void setIdentifier(const std::string& id)
{
if (_id)
_id.value()->setValue(std::forward<Str>(id));
_id.value()->setValue(id);
else
{
auto first = getFirstTokenIt();
_id = _tokenStream->emplace(first, STRING_KEY, std::forward<Str>(id));
_id = _tokenStream->emplace(first, STRING_KEY, id);
_assignToken = _tokenStream->emplace(first, ASSIGN, "=");
}
}
Expand Down Expand Up @@ -228,8 +246,8 @@ class String
protected:
std::shared_ptr<TokenStream> _tokenStream; ///< shared_pointer to the TokenStream in which the data is stored
Type _type; ///< Type of string //no need to store type of string in tokenstream - we just store the '"' or '/' characters
std::optional<TokenIt> _id; ///< Identifier //string
std::optional<TokenIt> _assignToken; ///< Identifier //string
std::optional<TokenIt> _id; ///< Optional TokenIt pointing to identifier in strings section
std::optional<TokenIt> _assignToken; ///< Optional TokenIt pointing to '=' following _id
std::unordered_map<StringModifier::Type, std::shared_ptr<StringModifier>> _mods; ///< String modifiers
};

Expand Down
15 changes: 11 additions & 4 deletions include/yaramod/types/symbol.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#include <string>
#include <unordered_map>

#include "yaramod/types/expression.h"
#include "yaramod/types/expression_type.h"
#include "yaramod/types/token_type.h"

namespace yaramod {

Expand Down Expand Up @@ -43,7 +44,7 @@ class Symbol
/// @name Getter methods
/// @{
const std::string& getName() const { return _name; }
Expression::Type getDataType() const { return _dataType; }
ExpressionType getDataType() const { return _dataType; }
Symbol::Type getType() const { return _type; }
TokenType getTokenType() const
{
Expand All @@ -59,6 +60,12 @@ class Symbol
}
/// @}

/// @name Setter methods
/// @{
template<typename T>
void setName(T&& name) { _name = std::forward<T>(name); }
/// @}

/// @name Detection methods
/// @{
bool isValue() const { return _type == Symbol::Type::Value; }
Expand All @@ -71,13 +78,13 @@ class Symbol
protected:
/// @name Constructors
/// @{
Symbol(Symbol::Type type, const std::string& name, Expression::Type dataType)
Symbol(Symbol::Type type, const std::string& name, ExpressionType dataType)
: _type(type), _name(name), _dataType(dataType) {}
/// @}

Symbol::Type _type; ///< Type of the symbol
std::string _name; ///< Name
Expression::Type _dataType; ///< Data type of the symbol
ExpressionType _dataType; ///< Data type of the symbol
};

}
Loading

0 comments on commit c98c014

Please sign in to comment.