Skip to content

Commit

Permalink
dwarf.decompile_address and changing decompiler to use bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
bannsec committed Mar 9, 2020
1 parent 0a2521f commit 7b91c5b
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 11 deletions.
9 changes: 9 additions & 0 deletions docs/overview/plugins/dwarf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,12 @@ The DWARF plugin can assist with looking up what the corresponding file and
line number would be for a given address. As with all things in ``revenge``
this address is the current loaded address, rather than a base address. This
lookup can be done via :meth:`~revenge.plugins.dwarf.Dwarf.lookup_file_line`.

You can also ask DWARF to "decompile" an address for you. Note, this isn't
actually decompiling, but the names are kept the same to avoid confusion.
Instead of actually decompiling, the plugin will attempt to lookup the source
address and line for your running address, and then lookup the corresponding
source code for it. You must ensure you have told the plugin where your source
directories are by using :meth:`~revenge.plugins.dwarf.Dwarf.add_source_path`.
Lookups for a source address can be done via
:meth:`~revenge.plugins.dwarf.Dwarf.decompile_address`.
24 changes: 20 additions & 4 deletions revenge/plugins/decompiler/decompiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __repr__(self):
attrs.append(hex(self.address))

if self.src is not None:
attrs.append(self.src)
attrs.append(self.src.decode('latin-1'))

return "<" + " ".join(attrs) + ">"

Expand All @@ -45,16 +45,17 @@ def __str__(self):
first = True

if self.src is not None:
src = self.src.decode('latin-1')

for line in self.src.split("\n"):
for line in src.split("\n"):

if first:
# Can't do adjustments since we don't know the name
if self._file_name is None:
saddr = "{:18s}".format(hex(self.address))
else:
# Adjust offset to make sense with our current binary
saddr = "{:18s}".format(hex(self._process.memory[self._file_name + ":" + hex(self.address)].address))
saddr = "{:18s}".format(hex(self._process.memory[self._file_name.decode('latin-1') + ":" + hex(self.address)].address))

if self.highlight is not None:
s += getattr(colorama.Back, self.highlight) + saddr + colorama.Style.RESET_ALL + "| "
Expand Down Expand Up @@ -99,8 +100,10 @@ def src(self):
return self.__src

@src.setter
@common.validate_argument_types(src=(str, type(None)))
@common.validate_argument_types(src=(str, bytes, type(None)))
def src(self, src):
if type(src) == str:
src = src.encode('latin-1')
self.__src = src

@property
Expand All @@ -113,6 +116,19 @@ def address(self):
def address(self, address):
self.__address = address

@property
def _file_name(self):
try:
return self.__file_name
except AttributeError:
return None

@_file_name.setter
def _file_name(self, file_name):
if type(file_name) is str:
file_name = file_name.encode('latin-1')
self.__file_name = file_name

class Decompiled(object):
def __init__(self, process, file_name=None):
"""Represents decompiled output.
Expand Down
1 change: 1 addition & 0 deletions revenge/plugins/dwarf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@

from .dwarf import Dwarf
from .dwarf_decompiler import DwarfDecompiler
30 changes: 27 additions & 3 deletions revenge/plugins/dwarf/dwarf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def lookup_file_line(self, address):
address (int): Address to lookup file line info
Returns:
tuple: (filename,line) or None if it wasn't found.
tuple: (filename,line) or None, None if it wasn't found.
Example:
.. code-block:: python
Expand All @@ -107,7 +107,6 @@ def lookup_file_line(self, address):
lineprog = self._dwarffile.line_program_for_CU(CU)
prevstate = None
for entry in lineprog.get_entries():
print(entry, prevstate)
# We're interested in those entries where a new state is assigned
if entry.state is None:
continue
Expand All @@ -127,7 +126,7 @@ def lookup_file_line(self, address):
prevstate = None
continue
prevstate = entry.state
return None
return (None, None)

@classmethod
def _modules_plugin(klass, module):
Expand Down Expand Up @@ -206,11 +205,36 @@ def functions(self):

return self.__functions

####################
# Decompiler stuff #
####################

@property
def decompiler(self):
"""'Decompiler' using dwarf."""
try:
return self.__decompiler
except AttributeError:
self.__decompiler = DwarfDecompiler(self._process, self)
return self.__decompiler

def decompile_address(self, address):
return self.decompiler.decompile_address(address)

def add_source_path(self, path):
return self.decompiler.add_source_path(path)

from elftools.elf.elffile import ELFFile
from elftools.common.py3compat import maxint, bytes2str
from elftools.dwarf.descriptions import describe_form_class
import elftools.common.exceptions

import os

from .dwarf_decompiler import DwarfDecompiler, DecompilerBase

# Doc fixup
Dwarf.__doc__ = Dwarf.__init__.__doc__
#Dwarf._modules_plugin.__doc__ = Dwarf.__init__.__doc__
Dwarf.decompile_address.__doc__ = DecompilerBase.decompile_address.__doc__
Dwarf.add_source_path.__doc__ = DwarfDecompiler.add_source_path.__doc__
92 changes: 92 additions & 0 deletions revenge/plugins/dwarf/dwarf_decompiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

import os
import logging

from ... import common

from revenge.plugins.decompiler.decompiled import Decompiled, DecompiledItem
from revenge.plugins.decompiler import DecompilerBase

class DwarfDecompiler(DecompilerBase):
SOURCE_DIRECTORIES = [b'.']

def __init__(self, process, dwarf):
super().__init__(process)
self._dwarf = dwarf

@common.validate_argument_types(path=(str,bytes))
def add_source_path(self, path):
"""Adds the given path to the list of directories to look for source
code in.
Args:
path (str, bytes): Path to add to our search
"""
path = os.path.abspath(path)

if not os.path.isdir(path):
LOGGER.error("path either does not exist or is not a directory.")
return

if type(path) is str:
path = path.encode('latin-1')

DwarfDecompiler.SOURCE_DIRECTORIES.append(path)

def _get_line_from_file(self, line, filename):
"""Attempt to open and return the given line from the given file.
Args:
line (int): What line to return
filename (str): Name of the file
Returns:
bytes: Source from file or None if couldn't find.
"""

for src_dir in DwarfDecompiler.SOURCE_DIRECTORIES:
path = os.path.join(src_dir, filename)

if os.path.isfile(path):

try:
with open(path, "rb") as f:
src = f.read()
except:
# Something went wrong opening the file...
continue

try:
return src.split(b"\n")[line-1]
except IndexError:
# Found the file but couldn't find the line...
# Maybe it's a different file with the same name?
continue

LOGGER.warning("Found debugging information, but cannot find path for '" + filename.decode('latin-1') + "'. Try adding it with:")
LOGGER.warning(" - process.modules['" + self._dwarf._module.name + "'].dwarf.add_source_path('<path_here>')")

@common.validate_argument_types(address=int)
def decompile_address(self, address):
if not self._dwarf.has_debug_info:
return None

filename, line = self._dwarf.lookup_file_line(address)

# Couldn't lookup file line
if filename is None:
return None

src_line = self._get_line_from_file(line, filename)

# src lookup failed
if src_line is None:
return None

decomp = Decompiled(self._process, self._dwarf._module.name)
decomp[address].address = address - self._dwarf._module.base - self._dwarf.base_address
decomp[address].src = src_line

return decomp

LOGGER = logging.getLogger(__name__)
4 changes: 2 additions & 2 deletions tests/linux/plugins/decompiler/test_decompiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_decompiler_basic():
assert len(a) == 1
assert 0x66d in a
assert a[0x66d].address == 0x66d
assert "sym.func" in a[0x66d].src
assert b"sym.func" in a[0x66d].src

process.quit()

Expand All @@ -52,7 +52,7 @@ def test_decompiler_basic():
assert len(a) == 1
assert 0x08048460 in a
assert a[0x08048460].address == 0x08048460
assert "sym.func" in a[0x08048460].src
assert b"sym.func" in a[0x08048460].src

process.quit()

26 changes: 26 additions & 0 deletions tests/linux/plugins/dwarf/test_dwarf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import revenge
types = revenge.types

from revenge.plugins.decompiler.decompiled import Decompiled
from revenge.plugins.dwarf.dwarf_decompiler import DwarfDecompiler

here = os.path.dirname(os.path.abspath(__file__))
bin_location = os.path.join(here, "..", "..", "bins")

Expand Down Expand Up @@ -37,6 +40,29 @@ def dwarf_basic(process):
assert basic.dwarf.lookup_file_line(basic.dwarf.functions[b'func1'].address) == (b'basic_dwarf.c', 4)
assert basic.dwarf.lookup_file_line(basic.dwarf.functions[b'main'].address_stop-1) == (b'basic_dwarf.c', 21)

basic.dwarf.add_source_path("/not_real_path")
assert b"/not_real_path" not in basic.dwarf.decompiler.SOURCE_DIRECTORIES

# Reset our source directories for testing purposes
DwarfDecompiler.SOURCE_DIRECTORIES = [b'.']
assert basic.dwarf.decompile_address(basic.dwarf.functions[b'main'].address) is None
basic.dwarf.add_source_path(bin_location)

#
# Decompile Address
#

decomp = basic.dwarf.decompile_address(basic.dwarf.functions[b'main'].address)
assert isinstance(decomp, Decompiled)

assert len(decomp) == 1
# Make sure all the relocation/base adjustments come back correct
assert list(decomp)[0] == basic.dwarf.functions[b'main'].address
item = decomp[basic.dwarf.functions[b'main'].address]
repr(item)
str(item)
assert item.src == b'int main(int argc, char **argv) {'

def test_dwarf_x64_basic():
process = revenge.Process(basic_dwarf_x64_path, resume=False, verbose=False)
dwarf_basic(process)
Expand Down
4 changes: 2 additions & 2 deletions tests/linux/plugins/radare2/test_ghidra.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_r2_ghidra_decompile_pie64_function():
assert 0x66d in d
assert d[0x66d].address == 0x66d
assert d[0x66d].highlight == 'GREEN'
assert "func" in d[0x66d].src
assert b"func" in d[0x66d].src

process.quit()

Expand Down Expand Up @@ -108,7 +108,7 @@ def test_r2_ghidra_decompile_nonpie32_function():
assert off in d
assert d[off].highlight == 'CYAN'
assert d[off].address == off
assert "func" in d[off].src
assert b"func" in d[off].src

process.quit()

0 comments on commit 7b91c5b

Please sign in to comment.