# Query Language

This notebook contains an overview of the query language built into the MATE attack framework.

## Setup

This section contains initial setup: loading the required libraries, starting the database container, etc.

In [None]:
# Automatically reload modules.
%load_ext autoreload
%autoreload 2

# Add project root to Python search path.
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Import all matchers and helper functions for the query language
from query_language.expressions import *
from query_language.node_matchers import *
from query_language.traversal_matchers import *

from query_language.get_query import get_query

# Import MATE framework.
from core.core import Core
from core.workspace import Workspace

from modules.basic_block_profiler import BasicBlockProfilerModule
from modules.branch_profiler import BranchProfilerModule
from modules.caballero import Caballero as CaballeroModule
from modules.call_targets import CallTargetsModule
from modules.data_dependencies import DataDependenciesModule
from modules.instruction_info import InstructionInfoModule
from modules.instruction_values import InstructionValuesModule
from modules.memory_buffers import MemoryBuffersModule
from modules.memory_instructions_profiler import MemoryInstructionsProfilerModule

# Load workspaces from database.
Workspace.load_from_db(Core())

## Collecting data

The first step is to add data to the database.

In [None]:
def run_demo():
    executable_path = Core().get_subdirectory('demos', 'coreutils', 'sha256sum')
    binary_params = 'input.txt'
    
    Workspace.create_new('sha256sum', executable_path)
    Workspace.select('sha256sum')
    workspace = Workspace.current
    
    # Create input file
    input_path = os.path.join(workspace.path, 'input.txt')
    with open(input_path, 'w') as f:
        f.write('Lorem ipsum dolor sit amet.')
        
    # (1) Basic block profiler
    bbl_profiler = BasicBlockProfilerModule(binary_params=binary_params, timeout=0)
    %time bbl_profiler.run()
    
    # (2) Branch profiler
    branch_profiler = BranchProfilerModule(binary_params=binary_params, timeout=0)
    %time branch_profiler.run()
    
    # (3) Caballero
    caballero = CaballeroModule(ratio=0.3, binary_params=binary_params, timeout=0)
    %time caballero.run()
    
    # (4) Call targets
    call_targets = CallTargetsModule(binary_params=binary_params, timeout=0)
    %time call_targets.run()
    
    # (5) Data dependencies
    data_deps = DataDependenciesModule(binary_params=binary_params, timeout=0, shortcuts=False)
    %time data_deps.run()
    
    # (6) Instruction info
    instr_info = InstructionInfoModule(binary_params=binary_params, timeout=0)
    %time instr_info.run()
    
    # (7) Instruction values
    instr_values = InstructionValuesModule(binary_params=binary_params, timeout=0)
    %time instr_values.run()
    
    # (8) Memory buffer
    mem_buffers = MemoryBuffersModule(binary_params=binary_params, timeout=0)
    %time mem_buffers.run()
    
    # (9) Memory instructions profiler
    mem_instr = MemoryInstructionsProfilerModule(binary_params=binary_params, timeout=0)
    %time mem_instr.run()

run_demo()

In [None]:
Workspace.select('sha256sum')

## Examples

This section contains example queries using the query language.

Additionally, each query has the corresponding Cypher query listed.

The correctness of the query language is also checked, by verifying that both the Cypher query and the query language output the same result.

In [None]:
def test_query(matcher, cypher_query):
    query = get_query(matcher)
    
    print('Matcher is converted to the following Cypher query:')
    print()
    print(query)
    
    db = Workspace.current.graph
    
    result = db.run_debug_json(query)
    cypher_result = db.run_debug_json(cypher_query)
    
    assert result == cypher_result, "Results from queries do not match!"
    assert len(result) > 0, "Useless query example: query does not return any results"
    
    print(f"{len(result)} result(s) match.")

In [None]:
matcher = \
MemoryBuffer(
).bind("data_buffer")

test_query(matcher, 'MATCH (data_buffer:MemoryBuffer) RETURN data_buffer')

In [None]:
matcher = \
Instruction(
    dependsOn(
        Instruction(
            P("opcode") == "add"
        ).bind("add_op")
    ),
    P("num_executions") == P("add_op", "num_executions")
).bind("ins")

test_query(matcher, 
           '''
           MATCH (ins:Instruction) -[:DEPENDS_ON]-> (add_op:Instruction)
           WHERE ins.num_executions = add_op.num_executions AND add_op.opcode = "add"
           RETURN ins, add_op
           ''')

In [None]:
matcher = \
Instruction(
    dependsOn(
        Instruction().bind("ins2"),
        P("register") != "rsp"
    )
).bind("ins1")

test_query(matcher, 
           '''
           MATCH (ins1:Instruction) -[dep:DEPENDS_ON]-> (ins2:Instruction)
           WHERE dep.register <> "rsp"
           RETURN ins1, ins2
           ''')

In [None]:
matcher = \
Instruction(
    dependsOn(
        Instruction().bind("ins2"),
        P("register") != "rsp"
    ).bind("dep")
).bind("ins1")

test_query(matcher, 
           '''
           MATCH (ins1:Instruction) -[dep:DEPENDS_ON]-> (ins2:Instruction)
           WHERE dep.register <> "rsp"
           RETURN ins1, ins2, dep
           ''')

In [None]:
matcher = \
Instruction(
    dependsOn(
        Instruction().bind("ins2"),
        ((P("register") != "rsp") & (P("register") != "rip")) | is_null(P("register"))
    ).bind("dep")
).bind("ins1")

test_query(matcher,
           '''
           MATCH (ins1:Instruction) -[dep:DEPENDS_ON]-> (ins2:Instruction)
           WHERE (dep.register <> "rsp" AND dep.register <> "rip") OR dep.register IS NULL
           RETURN ins1, ins2, dep
           ''')