-
Notifications
You must be signed in to change notification settings - Fork 306
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds frida script for gathering code coverage
Implements a frida script that gathers code coverage information and saves it using drcov format. Probably should be considered experimental, as it's the first thing I've written with frida and Im sure I've screwed something up =p
- Loading branch information
Showing
2 changed files
with
272 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# frida-drcov.py | ||
|
||
A quick and dirty frida-based bb-tracer | ||
|
||
If your target is complex, you'll likely want to use a better, dedicated | ||
tracing engine like drcov or pin. This tracer has some significant | ||
shortcomings, which are exagerated on larger or more complex binaries: | ||
* It drops coverage, especially near `exit()` | ||
* It cannot easily detect new threads being created, thus cannot instrument | ||
them | ||
* Self modifying code will confuse it, though to be fair I'm not sure how | ||
drcov, pin, or otheres deal with self modifying code either | ||
|
||
These shortcomines are probably 10% frida's fault and 90% the author's. Despite | ||
these flaws however, it is hard to beat the ease of use frida provides. | ||
|
||
## Install | ||
|
||
`$ pip install frida` | ||
|
||
## Usage | ||
`$ ./frida-drcov <process name | pid>` | ||
|
||
You can whitelist specific modules inside your target. Say you have binary | ||
`foo` which imports `libbiz`, `libbaz`, and `libbar`. You only want to trace | ||
`libbiz` and `libbaz`: | ||
|
||
`$ ./frida-drcov -w libbiz -w libbaz foo` | ||
|
||
By default, it will create and write to `frida-drcov.log` in the current | ||
working directory. You can change this with `-o`: | ||
|
||
`$ ./frida-drcov -o more-coverage.log foo` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
#!/usr/bin/env python | ||
from __future__ import print_function | ||
|
||
""" | ||
A quick and dirty frida-based bb-tracer | ||
If your target is complex, you'll likely want to use a better, dedicated | ||
tracing engine like drcov, or pin. This tracer has some significant | ||
shortcomings, which are exagerated on large, complex binaries: | ||
* It drops coverage, especially near `exit()` | ||
* It cannot easily detect new threads being created, thus cannot instrument | ||
them | ||
* Self modifying code will confuse it, though to be fair I'm not sure how | ||
drcov, pin, or otheres deal with self modifying code either | ||
These shortcomines are probably 10% frida's and 90% the author's. Despite these | ||
flaws however, it is hard to beat the ease of use frida provides. | ||
""" | ||
|
||
import argparse | ||
import json | ||
import sys | ||
|
||
import frida | ||
|
||
js = """ | ||
"use strict"; | ||
var whitelist = %s; | ||
// Get the module map | ||
function update_maps() { | ||
var maps = Process.enumerateModulesSync(); | ||
var i = 0; | ||
// We need to add the module id | ||
maps.map(function(o) { o.id = i++; }); | ||
// .. and the module end point | ||
maps.map(function(o) { o.end = o.base.add(o.size); }); | ||
return maps; | ||
} | ||
function mod_lookup(a) { | ||
for (var i = 0; i < maps.length; ++i) { | ||
var m = maps[i]; | ||
if (a.compare(m.base) == 1 && a.compare(m.end) == -1) { | ||
if (whitelist.indexOf('all') >= 0 || | ||
whitelist.indexOf(m.name) >= 0) { | ||
return {start: m.base, id: m.id}; | ||
} else { | ||
return {start: 0, id: 0}; | ||
} | ||
} | ||
} | ||
console.log('Could not find module for: ' + a); | ||
return {start: 0, id: 0}; | ||
} | ||
function drcov_bb(bbs, maps) { | ||
var bb = new ArrayBuffer(8 * bbs.length); | ||
for (var i = 0; i < bbs.length; ++i) { | ||
var e = bbs[i]; | ||
var start = e[0]; | ||
var end = e[1]; | ||
var mod_info = mod_lookup(start); | ||
if (mod_info.start == 0 && mod_info.id == 0) { continue; } | ||
var offset = start.sub(mod_info.start).toInt32(); | ||
var size = end.sub(start).toInt32(); | ||
var mod_id = mod_info.id; | ||
/* | ||
// Data structure for the coverage info itself | ||
typedef struct _bb_entry_t { | ||
uint start; // offset of bb start from the image base | ||
ushort size; | ||
ushort mod_id; | ||
} bb_entry_t; | ||
*/ | ||
var x = new Uint32Array(bb, i * 8, 1); | ||
x[0] = offset; | ||
var y = new Uint16Array(bb, i * 8 + 4, 2); | ||
y[0] = size; | ||
y[1] = mod_id; | ||
} | ||
return bb; | ||
} | ||
var maps = update_maps() | ||
send({'map': maps}); | ||
// Note, we will miss any bbs hit by threads that are created after we've | ||
// attached | ||
Process.enumerateThreads({ | ||
onMatch: function (thread) { | ||
Stalker.follow(thread.id, { | ||
// It would be really nice to use 'compile' here instead of 'block', | ||
// but if we did that we'd miss coverage of blocks we hit before | ||
// attaching, and I don't really think thats acceptable. It would be | ||
// a lot faster though :-/ | ||
events: { | ||
block: true | ||
}, | ||
onReceive: function (event) { | ||
var bb_events = Stalker.parse(event, {stringify: false, annotate: false}); | ||
var bbs = drcov_bb(bb_events, maps); | ||
// We're going to send a dummy message, the actual bb is in the | ||
// data field. We're sending a dict to keep it consistent with the | ||
// map. We're also creating the drcov event in javascript, so on | ||
// the py recv side we can just blindly add it to a set. | ||
send({bb:1}, bbs); | ||
} | ||
}); | ||
}, | ||
onComplete: function () { ; } | ||
}); | ||
""" | ||
|
||
modules = [] | ||
bbs = set([]) | ||
|
||
def usage(argv0): | ||
sys.stderr.write('Usage: %s <process name/pid>\n' % argv0) | ||
sys.exit(1) | ||
|
||
def populate_modules(image_list): | ||
global modules | ||
|
||
for image in image_list: | ||
idx = image['id'] | ||
path = image['path'] | ||
base = int(image['base'], 0) | ||
end = int(image['end'], 0) | ||
size = image['size'] | ||
|
||
m = { | ||
'id': idx, | ||
'path': path, | ||
'base': base, | ||
'end': end, | ||
'size': size} | ||
|
||
modules.append(m) | ||
|
||
print('Got module data, now collecting coverage') | ||
print('Control-D to terminate....') | ||
return modules | ||
|
||
def populate_bbs(data): | ||
global bbs | ||
|
||
for i in xrange(0, len(data), 8): | ||
bbs.add(data[i:i+8]) | ||
|
||
def create_header(modules): | ||
header = '' | ||
header += 'DRCOV VERSION: 2\n' | ||
header += 'DRCOV FLAVOR: frida\n' | ||
header += 'Module Table: version 2, count %d\n' % len(modules) | ||
header += 'Columns: id, base, end, entry, checksum, timestamp, path\n' | ||
|
||
entries = [] | ||
|
||
for m in modules: | ||
# drcov: id, base, end, entry, checksum, timestamp, path | ||
# frida doesnt give us entry, checksum, or timestamp | ||
# luckily, I don't think we need them. | ||
entry = '%3d, %#016x, %#016x, %#016x, %#08x, %#08x, %s' % ( | ||
m['id'], m['base'], m['end'], 0, 0, 0, m['path']) | ||
|
||
entries.append(entry) | ||
|
||
header_modules = '\n'.join(entries) | ||
|
||
return header + header_modules + '\n' | ||
|
||
def create_coverage(data): | ||
# Filter out the bbs that didnt match, or were unable to resolve | ||
filtered_bbs = [x for x in bbs if x != '\x00' * 8] | ||
|
||
bb_header = 'BB Table: %d bbs\n' % len(filtered_bbs) | ||
return bb_header + ''.join(bbs) | ||
|
||
def on_message(msg, data): | ||
print(msg) | ||
pay = msg['payload'] | ||
if 'map' in pay: | ||
maps = msg['payload']['map'] | ||
populate_modules(maps) | ||
else: | ||
populate_bbs(data) | ||
|
||
def main(argv): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('target', help='target process name or pid') | ||
parser.add_argument('-o', '--outfile', help='coverage file', | ||
default='frida-cov.log') | ||
parser.add_argument('-w', '--whitelist', | ||
help='module to trace, may be specified multiple times', | ||
action='append', default=[]) | ||
|
||
args = parser.parse_args() | ||
|
||
whitelist = args.whitelist if len(args.whitelist) else ['all'] | ||
|
||
json_whitelist = json.dumps(whitelist) | ||
|
||
session = frida.attach(args.target) | ||
script = session.create_script(js % (json_whitelist)) | ||
|
||
script.on('message', on_message) | ||
script.load() | ||
sys.stdin.read() | ||
|
||
print('Detatching...') | ||
session.detach() | ||
|
||
print('Stopped collecting. Formatting coverage and saving...') | ||
|
||
header = create_header(modules) | ||
body = create_coverage(bbs) | ||
|
||
with open(args.outfile, 'wb') as h: | ||
h.write(header) | ||
h.write(body) | ||
|
||
print('Done!') | ||
|
||
if __name__ == '__main__': | ||
main(sys.argv) |