hase-project · Mic92 · May 10, 2018 · May 8, 2018 · May 9, 2018 · May 9, 2018
diff --git a/.gitignore b/.gitignore
@@ -116,3 +116,5 @@ auseless/
 # redis
 *.rdb
 tests/benchmark/results
+
+tests/bin/*
diff --git a/README.md b/README.md
@@ -25,6 +25,27 @@ Additionally pyqt5 is required and cannot be installed via pip.
 $ pip install -e '.[test]'
 ```
 
+4. Patch the perf-script-sample-addr
+
+```console
+git clone https://github.com/torvalds/linux
+cd ./linux/tools/perf
+cp path-to-your-hase-folder/perf-script-sample-addr.patch .
+patch -p3 < perf-script-sample-addr.patch
+make
+sudo cp perf /usr/bin
+```
+
+Note: some new parse rules are applied recent days, so if you have intel_pt//u parse error, check this patch https://lkml.org/lkml/2018/5/7/94 and solve by git checkout an-eariler-commit-id
+
+
+5. Testing examples
+
+
+```console
+sudo nosetests -w tests/test_record.py
+```
+
 # Record crashes
 
 ```console
@@ -41,6 +62,10 @@ $ ls -la /var/lib/hase
 .rw-r--r-- 41M root  9 May  3:22 loopy-20180509T022227.tar.gz
 ```
 
+```console
+$ sudo ./bin/hase record ls -al
+```
+
 # Benchmarks
 
 Benchmarks require Pandas, which cannot be installed via pip.

diff --git a/hase/__init__.py b/hase/__init__.py
@@ -2,7 +2,6 @@
 from .cli import parse_arguments
 import sys
 
-
 def main(argv=sys.argv):
     args = parse_arguments(argv)
     if args.debug:

diff --git a/hase/annotate.py b/hase/annotate.py
@@ -42,5 +42,11 @@ def compute(self):
 
             for addr, line in zip(addresses, lines):
                 file, line = line.split(":")
+                # TODO: file:line (discriminator n)
+                # TODO: file:?
+                print(file, line)
+                line = line.split(" ")[0]
+                if line == "?":
+                    line = 0
                 addr_map[addr] = (file, int(line))
         return addr_map
diff --git a/hase/cli.py b/hase/cli.py
@@ -39,6 +39,12 @@ def parse_arguments(argv):
         type=int,
         help="Maximum crashes to record (0 for unlimited crashes)")
 
+    record.add_argument(
+        "args",
+        nargs="*",
+        help="Executable and arguments for perf tracing"
+    )
+
     replay = subparsers.add_parser('replay')
     replay.add_argument("report")
 

diff --git a/hase/frontend/__init__.py b/hase/frontend/__init__.py
@@ -46,15 +46,18 @@ def __init__(self, *args):
 
     def set_location(self, source_file, line):
         # type: (str, int) -> None
-        lexer = pygments.lexers.get_lexer_for_filename(source_file)
-        formatter_opts = dict(linenos="inline", linespans="line", hl_lines=[line])
-        html_formatter = pygments.formatters.get_formatter_by_name("html", **formatter_opts)
-        css = html_formatter.get_style_defs('.highlight')
-        with open(source_file) as f:
-            tokens = lexer.get_tokens(f.read())
-        source = pygments.format(tokens, html_formatter)
-        self.code_view.setHtml(code_template.format(css, source))
-        self.code_view.scrollToAnchor("line-%d" % max(0, line - 10))
+        # FIXME: how to robust deal with ??
+        print(source_file, line)
+        if source_file != '??':
+            lexer = pygments.lexers.get_lexer_for_filename(source_file)
+            formatter_opts = dict(linenos="inline", linespans="line", hl_lines=[line])
+            html_formatter = pygments.formatters.get_formatter_by_name("html", **formatter_opts)
+            css = html_formatter.get_style_defs('.highlight')
+            with open(source_file) as f:
+                tokens = lexer.get_tokens(f.read())
+            source = pygments.format(tokens, html_formatter)
+            self.code_view.setHtml(code_template.format(css, source))
+            self.code_view.scrollToAnchor("line-%d" % max(0, line - 10))
 
     def setup_ipython(self, app, window):
         # type: (QtWidgets.QApplication, MainWindow) -> None

diff --git a/hase/frontend/ipython_extension.py b/hase/frontend/ipython_extension.py
@@ -12,6 +12,7 @@
 
 from .. import gdb, annotate
 from ..replay import replay_trace
+from ..path import Tempdir
 
 # only for function in Magics class
 # FIXME: inherit documentation (maybe by functools.wraps)
@@ -73,14 +74,14 @@ def reload_hase(self, query):
     @args("<report_archive>")
     @line_magic("load")
     def load(self, query):
-        states = replay_trace(query)
-
-        user_ns = self.shell.user_ns
-        addr2line = annotate.Addr2line()
-        for s in states:
-            addr2line.add_addr(s.object(), s.address())
-
-        addr_map = addr2line.compute()
+        with Tempdir() as tempdir:
+            states = replay_trace(query, tempdir)
+            user_ns = self.shell.user_ns
+            addr2line = annotate.Addr2line()
+            for s in states:
+                addr2line.add_addr(s.object(), s.address())
+
+            addr_map = addr2line.compute()
         self.active_state = states[-1]
         user_ns["addr_map"] = addr_map
         user_ns["states"] = states

diff --git a/hase/path.py b/hase/path.py
@@ -5,11 +5,11 @@
 import errno
 import shutil
 
-from typing import Union, AnyStr
+from typing import Union, AnyStr, Optional
 
 
 def which(program):
-    # type: (str) -> Path
+    # type: (str) -> Optional[Path]
     def is_exe(fpath):
         return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
 

diff --git a/hase/perf.py b/hase/perf.py
@@ -2,20 +2,21 @@
 
 import subprocess
 import os
-from typing import List, Tuple, Any, Union, Callable
+from typing import List, Tuple, Any, Union, Callable, Optional
 
 from .path import APP_ROOT
 
 TRACE_END = -1
 
 
 class PTSnapshot():
-    def __init__(self, perf_file="perf.data"):
-        # type: (str) -> None
+    def __init__(self, perf_file="perf.data", cmds=None):
+        # type: (str, Optional[List[str]]) -> None
 
         cmd = [
             "perf",
             "record",
+            "--no-buildid",
             "--no-buildid-cache",
             "--output",
             perf_file,
@@ -27,18 +28,24 @@ def __init__(self, perf_file="perf.data"):
             "intel_pt//u",
         ]
 
+        self.test_cmds = cmds
+
         dummy_process = [
             "sh", "-c", "echo ready; while true; do sleep 999999; done"
         ]
         self.perf_file = perf_file
         self.process = subprocess.Popen(
             cmd + dummy_process, stdout=subprocess.PIPE)
-        line = self.process.stdout.readline().strip()
-        assert line == "ready", "expected perf to return 'ready', got '%s'" % (
+        if self.process.stdout:
+            line = self.process.stdout.readline().strip()
+            assert line == "ready", "expected perf to return 'ready', got '%s'" % (
             line)
 
     def get(self):
         # type: () -> PerfData
+        if self.test_cmds:
+            test_process = subprocess.Popen(self.test_cmds)
+            test_process.wait()
         self.process.wait()
         return PerfData(self.perf_file)
 

diff --git a/hase/record.py b/hase/record.py
@@ -17,7 +17,7 @@
 
 from . import pwn_wrapper
 
-from typing import Optional, IO, Any, Tuple
+from typing import Optional, IO, Any, Tuple, List
 
 l = logging.getLogger(__name__)
 
@@ -26,10 +26,10 @@
 PROT_EXEC = 4
 
 
-def record(record_paths):
-    # type: (RecordPaths) -> Tuple[coredumps.Coredump, perf.PerfData]
+def record(record_paths, cmds=None):
+    # type: (RecordPaths, List[str]) -> Tuple[coredumps.Coredump, perf.PerfData]
 
-    with perf.PTSnapshot(perf_file=str(record_paths.perf)) as snapshot:
+    with perf.PTSnapshot(perf_file=str(record_paths.perf), cmds=cmds) as snapshot:
         handler = coredumps.Handler(snapshot.perf_pid,
                                     str(record_paths.coredump),
                                     str(record_paths.fifo),
@@ -203,8 +203,8 @@ def report_worker(queue):
             job.remove()
 
 
-def record_loop(record_path, log_path, pid_file=None, limit=0):
-    # type: (Path, Path, str, int) -> None
+def record_loop(record_path, log_path, pid_file=None, limit=0, cmds=None):
+    # type: (Path, Path, str, int, List[str]) -> None
 
     job_queue = Queue()  # type: Queue
     post_process_thread = Thread(target=report_worker, args=(job_queue, ))
@@ -216,7 +216,7 @@ def record_loop(record_path, log_path, pid_file=None, limit=0):
             i += 1
             # TODO ratelimit
             record_paths = RecordPaths(record_path, i, log_path, pid_file)
-            (coredump, perf_data) = record(record_paths)
+            (coredump, perf_data) = record(record_paths, cmds)
             job_queue.put(Job(coredump, perf_data, record_paths))
     except KeyboardInterrupt:
         pass
@@ -235,4 +235,4 @@ def record_command(args):
     logging.basicConfig(filename=str(log_path.join("hase.log")), level=logging.INFO)
 
     with Tempdir() as tempdir:
-        record_loop(tempdir, log_path, pid_file=args.pid_file, limit=args.limit)
+        record_loop(tempdir, log_path, pid_file=args.pid_file, limit=args.limit, cmds=args.args)
diff --git a/hase/replay.py b/hase/replay.py
@@ -34,27 +34,27 @@ def load_manifest(archive_root):
 
     return manifest
 
+# FIXME: since ipython-addr2line needs binary to exist, add tempdir parameter?
+def replay_trace(report, tempdir):
+    # type: (str, Tempdir) -> List[State]
 
-def replay_trace(report):
-    # type: (str) -> List[State]
+    subprocess.check_call(["tar", "-xzf", report, "-C", str(tempdir)])
 
-    with Tempdir() as tempdir:
-        subprocess.check_call(["tar", "-xzf", report, "-C", str(tempdir)])
-
-        manifest = load_manifest(tempdir)
+    manifest = load_manifest(tempdir)
 
-        coredump = manifest["coredump"]
+    coredump = manifest["coredump"]
 
-        t = Tracer(
-            coredump["executable"],
-            coredump["global_tid"],
-            manifest["perf_data"],
-            coredump["file"],
-            manifest["mappings"],
-            executable_root=str(tempdir.join("binaries")))
-        return t.run()
+    t = Tracer(
+        coredump["executable"],
+        coredump["global_tid"],
+        manifest["perf_data"],
+        coredump["file"],
+        manifest["mappings"],
+        executable_root=str(tempdir.join("binaries")))
+    return t.run()
 
 
 def replay_command(args):
     # type: (argparse.Namespace) -> List[State]
-    return replay_trace(args.report)
+    with Tempdir() as tempdir:
+        return replay_trace(args.report, tempdir)
diff --git a/hase/symbex/state.py b/hase/symbex/state.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import, division, print_function
 from angr import SimState
 from cle import ELF
-from typing import Dict, Tuple
+from typing import Dict, Tuple, Optional
 
 from ..perf import TRACE_END, Branch
 from ..annotate import Addr2line
@@ -33,7 +33,7 @@ def __init__(self, state):
         self.state = state
 
     def __getitem__(self, addr):
-        # type: (int) -> int
+        # type: (int) -> Optional[int]
         # good idea?
         byte = self.state.simstate.mem[addr].byte
         try:

diff --git a/hase/util/__init__.py b/hase/util/__init__.py
@@ -0,0 +1 @@
+from __future__ import absolute_import, division, print_function
diff --git a/hase/util/sig_wrapper.py b/hase/util/sig_wrapper.py
@@ -0,0 +1,18 @@
+import signal, os
+
+from typing import Any
+
+class RegisterSig():
+    def __init__(self, signum, handler = signal.SIG_IGN):
+        # type: (int, Any) -> None
+        self.signum = signum
+        self.handler = handler
+        self.original_handler = signal.getsignal(signum)
+
+    def __enter__(self):
+        # type: () -> RegisterSig
+        signal.signal(self.signum, self.handler)
+        return self
+
+    def __exit__(self, type, value, traceback):
+        signal.signal(self.signum, self.original_handler)
diff --git a/tests/bin/control_flow/control_flow b/tests/bin/control_flow/control_flow
diff --git a/tests/bin/control_flow/control_flow-20180404T163033.coredump b/tests/bin/control_flow/control_flow-20180404T163033.coredump
diff --git a/tests/bin/control_flow/control_flow-20180404T163033.perf b/tests/bin/control_flow/control_flow-20180404T163033.perf