-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfrida-drcov.py
More file actions
executable file
·468 lines (369 loc) · 14.2 KB
/
frida-drcov.py
File metadata and controls
executable file
·468 lines (369 loc) · 14.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
#!/usr/bin/env python
from __future__ import print_function
import argparse
import json
import os
import signal
import sys
import frida
"""
Frida BB tracer that outputs in DRcov format.
Frida script is responsible for:
- Getting and sending the process module map initially
- Getting the code execution events
- Parsing the raw event into a GumCompileEvent
- Converting from GumCompileEvent to DRcov block
- Sending a list of DRcov blocks to python
Python side is responsible for:
- Attaching and detaching from the target process
- Removing duplicate DRcov blocks
- Formatting module map and blocks
- Writing the output file
"""
# Our frida script, takes two string arguments to embed
# 1. whitelist of modules, in the form "['module_a', 'module_b']" or "['all']"
# 2. threads to trace, in the form "[345, 765]" or "['all']"
js = """
"use strict";
var whitelist = %s;
var threadlist = %s;
var nativetrigger = %s;
var objctrigger = %s;
// Get the module map
function make_maps() {
var maps = Process.enumerateModulesSync();
var i = 0;
// We need to add the module id
maps.map(function(o) { o.id = i++; });
// .. and the module end point
maps.map(function(o) { o.end = o.base.add(o.size); });
return maps;
}
var maps = make_maps()
send({'map': maps});
// We want to use frida's ModuleMap to create DRcov events, however frida's
// Module object doesn't have the 'id' we added above. To get around this,
// we'll create a mapping from path -> id, and have the ModuleMap look up the
// path. While the ModuleMap does contain the base address, if we cache it
// here, we can simply look up the path rather than the entire Module object.
var module_ids = {};
maps.map(function (e) {
module_ids[e.path] = {id: e.id, start: e.base};
});
var filtered_maps = new ModuleMap(function (m) {
if (whitelist.indexOf('all') >= 0) { return true; }
return whitelist.some(item => m.name.toLowerCase().includes(item.toLowerCase()));
});
// This function takes a list of GumCompileEvents and converts it into a DRcov
// entry. Note that we'll get duplicated events when two traced threads
// execute the same code, but this will be handled by the python side.
function drcov_bbs(bbs, fmaps, path_ids) {
// We're going to use send(..., data) so we need an array buffer to send
// our results back with. Let's go ahead and alloc the max possible
// reply size
/*
// Data structure for the coverage info itself
typedef struct _bb_entry_t {
uint start; // offset of bb start from the image base
ushort size;
ushort mod_id;
} bb_entry_t;
*/
var entry_sz = 8;
var bb = new ArrayBuffer(entry_sz * bbs.length);
var num_entries = 0;
for (var i = 0; i < bbs.length; ++i) {
var e = bbs[i];
var start = e[0];
var end = e[1];
var path = fmaps.findPath(start);
if (path == null) { continue; }
var mod_info = path_ids[path];
var offset = start.sub(mod_info.start).toInt32();
var size = end.sub(start).toInt32();
var mod_id = mod_info.id;
// We're going to create two memory views into the array we alloc'd at
// the start.
// we want one u32 after all the other entries we've created
var x = new Uint32Array(bb, num_entries * entry_sz, 1);
x[0] = offset;
// we want two u16's offset after the 4 byte u32 above
var y = new Uint16Array(bb, num_entries * entry_sz + 4, 2);
y[0] = size;
y[1] = mod_id;
++num_entries;
}
// We can save some space here, rather than sending the entire array back,
// we can create a new view into the already allocated memory, and just
// send back that linear chunk.
return new Uint8Array(bb, 0, num_entries * entry_sz);
}
// Punt on self modifying code -- should improve speed and lighthouse will
// barf on it anyways
Stalker.trustThreshold = 0;
function attach_threads(){
// Note, we will miss any bbs hit by threads that are created after we've
// attached
Process.enumerateThreads({
onMatch: function (thread) {
if (threadlist.indexOf(thread.id) < 0 &&
threadlist.indexOf('all') < 0) {
// This is not the thread you're look for
return;
}
console.log('Stalking thread ' + thread.id + '.');
Stalker.follow(thread.id, {
events: {
compile: true
},
onReceive: function (event) {
var bb_events = Stalker.parse(event,
{stringify: false, annotate: false});
var bbs = drcov_bbs(bb_events, filtered_maps, module_ids);
// We're going to send a dummy message, the actual bb is in the
// data field. We're sending a dict to keep it consistent with
// the map. We're also creating the drcov event in javascript,
// so on the py recv side we can just blindly add it to a set.
send({bbs: 1}, bbs);
}
});
},
onComplete: function () { console.log('Done stalking threads.'); }
});
}
function detach_threads(){
Process.enumerateThreads({
onMatch: function (thread) {
if (threadlist.indexOf(thread.id) < 0 &&
threadlist.indexOf('all') < 0) {
// This is not the thread you're look for
return;
}
console.log('Stalker detach from thread ' + thread.id + '.');
Stalker.unfollow(thread.id);
},
onComplete: function () { console.log('Done stalking threads.'); }
});
}
if(nativetrigger.length > 0){
try{
var trigger = Module.getExportByName(nativetrigger[0], nativetrigger[1]);
Interceptor.attach(trigger, {
onEnter(args) {
console.log('[*] Native trigger hit. Attaching to threads now.');
attach_threads();
},
onLeave(retval) {
console.log('[*] Native trigger left. Detaching from threads now.');
Interceptor.detachAll();
detach_threads();
}
});
}catch(error){
console.log('[-] Error: Native module/method pair not found in process!');
}
}else if(objctrigger.length > 0){
try{
var objcclass = ObjC.classes[objctrigger[0]];
if(!objcclass){
throw new Error();
}
var objcmethod = null;
for(var j = 0; j < objcclass.$methods.length; j++){
if(objcclass.$methods[j] == objctrigger[1]){
objcmethod = objcclass.$methods[j];
break;
}
}
if(!objcmethod){
throw new Error();
}
var trigger = objcclass[objcmethod].implementation;
Interceptor.attach(trigger, {
onEnter(args) {
console.log('[*] ObjC trigger hit. Attaching to threads now.');
attach_threads();
},
onLeave(retval) {
console.log('[*] ObjC trigger left. Detaching from threads now.');
Interceptor.detachAll();
detach_threads();
}
});
}catch(error){
console.log('[-] Error: ObjC module/method pair not found in process!');
}
}else{
attach_threads();
}
"""
# These are global so we can easily access them from the frida callbacks or
# signal handlers. It's important that bbs is a set, as we're going to depend
# on it's uniquing behavior for deduplication
modules = []
bbs = set([])
outfile = 'frida-cov.log'
# This converts the object frida sends which has string addresses into
# a python dict
def populate_modules(image_list):
global modules
for image in image_list:
idx = image['id']
path = image['path']
base = int(image['base'], 0)
end = int(image['end'], 0)
size = image['size']
m = {
'id': idx,
'path': path,
'base': base,
'end': end,
'size': size}
modules.append(m)
print('[+] Got module info.')
# called when we get coverage data from frida
def populate_bbs(data):
global bbs
# we know every drcov block is 8 bytes, so lets just blindly slice and
# insert. This will dedup for us.
block_sz = 8
for i in range(0, len(data), block_sz):
bbs.add(data[i:i+block_sz])
# take the module dict and format it as a drcov logfile header
def create_header(mods):
header = ''
header += 'DRCOV VERSION: 2\n'
header += 'DRCOV FLAVOR: frida\n'
header += 'Module Table: version 2, count %d\n' % len(mods)
header += 'Columns: id, base, end, entry, checksum, timestamp, path\n'
entries = []
for m in mods:
# drcov: id, base, end, entry, checksum, timestamp, path
# frida doesnt give us entry, checksum, or timestamp
# luckily, I don't think we need them.
entry = '%3d, %#016x, %#016x, %#016x, %#08x, %#08x, %s' % (
m['id'], m['base'], m['end'], 0, 0, 0, m['path'])
entries.append(entry)
header_modules = '\n'.join(entries)
return ("%s%s\n" % (header, header_modules)).encode("utf-8")
# take the recv'd basic blocks, finish the header, and append the coverage
def create_coverage(data):
bb_header = b'BB Table: %d bbs\n' % len(data)
return bb_header + b''.join(data)
def on_message(msg, data):
#print(msg)
pay = msg['payload']
if 'map' in pay:
maps = pay['map']
populate_modules(maps)
else:
populate_bbs(data)
def sigint(signo, frame):
print('[!] SIGINT, saving %d blocks to \'%s\'' % (len(bbs), outfile))
save_coverage()
print('[!] Done')
os._exit(1)
def save_coverage():
header = create_header(modules)
body = create_coverage(bbs)
with open(outfile, 'wb') as h:
h.write(header)
h.write(body)
def main():
global outfile
parser = argparse.ArgumentParser()
parser.add_argument('target',
help='target process name or pid',
default='-1')
parser.add_argument('-o', '--outfile',
help='coverage file',
default='frida-cov.log')
parser.add_argument('-w', '--whitelist-modules',
help='module to trace, may be specified multiple times [all]',
action='append', default=[])
parser.add_argument('-t', '--thread-id',
help='threads to trace, may be specified multiple times [all]',
action='append', type=int, default=[])
parser.add_argument('-N', '--native-module',
help='select a native module to trigger the trace',
action='append', default=[])
parser.add_argument('-n', '--native-method',
help='select a native method to trigger the trace',
action='append', default=[])
parser.add_argument('-M', '--objc-class',
help='select an objective-c class to trigger the trace',
action='append', default=[])
parser.add_argument('-m', '--objc-method',
help='select an objective-c method to trigger the trace',
action='append', default=[])
parser.add_argument('-D', '--device',
help='select a device by id [local]',
default='local')
args = parser.parse_args()
outfile = args.outfile
device = frida.get_device(args.device)
target = -1
for p in device.enumerate_processes():
if args.target in [str(p.pid), p.name]:
if target == -1:
target = p.pid
else:
print('[-] Warning: multiple processes on device match '
'\'%s\', using pid: %d' % (args.target, target))
if target == -1:
print('[-] Error: could not find process matching '
'\'%s\' on device \'%s\'' % (args.target, device.id))
sys.exit(1)
signal.signal(signal.SIGINT, sigint)
whitelist_modules = ['all']
if len(args.whitelist_modules):
whitelist_modules = args.whitelist_modules
threadlist = ['all']
if len(args.thread_id):
threadlist = args.thread_id
# Incorporate triggering thread attachment on native method
native_module_trigger = []
if args.native_module:
native_module_trigger = args.native_module
native_method_trigger = []
if args.native_method:
native_method_trigger = args.native_method
native_trigger = native_module_trigger + native_method_trigger
# Incorporate triggering thread attachment on objective-c method
objc_method_trigger = []
if args.objc_method:
objc_method_trigger = args.objc_method
objc_class_trigger = []
if args.objc_class:
objc_class_trigger = args.objc_class
objc_trigger = objc_class_trigger + objc_method_trigger
# Check sanity of triggers - only one trigger and includes two items
if len(objc_trigger) == 2 and len(native_trigger) == 0:
print('[*] Triggering on Objective-C method: %s %s' % tuple(objc_trigger))
if len(native_trigger) == 2 and len(objc_trigger) == 0:
print('[*] Triggering on native method: %s %s' % tuple(native_trigger))
if len(native_trigger) > 0 and len(objc_trigger) > 0:
print('[-] Error: Cannot trigger on both native and Objective-C methods')
sys.exit(1)
json_whitelist_modules = json.dumps(whitelist_modules)
json_threadlist = json.dumps(threadlist)
json_native_trigger = json.dumps(native_trigger)
json_objc_trigger = json.dumps(objc_trigger)
print('[*] Attaching to pid \'%d\' on device \'%s\'...' %
(target, device.id))
session = device.attach(target)
print('[+] Attached. Loading script...')
script = session.create_script(js % (json_whitelist_modules, json_threadlist, json_native_trigger, json_objc_trigger))
script.on('message', on_message)
script.load()
print('[*] Now collecting info, control-C or control-D to terminate....')
sys.stdin.read()
print('[*] Detaching, this might take a second...')
session.detach()
print('[+] Detached. Got %d basic blocks.' % len(bbs))
print('[*] Formatting coverage and saving...')
save_coverage()
print('[!] Done')
sys.exit(0)
if __name__ == '__main__':
main()