Skip to content
Newer
Older
100644 481 lines (435 sloc) 15.5 KB
e740134 @comex add some missing files
authored
1 #include "find.h"
2 #include "binary.h"
3
2502f65 @comex links
authored
4 // Various links:
5 // http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treachery/
6 // http://www-igm.univ-mlv.fr/~lecroq/string/tunedbm.html#SECTION00195
7 // http://www-igm.univ-mlv.fr/~lecroq/string/node19.html#SECTION00190 (was using this)
8
ec1623c @comex bool->int
authored
9 static addr_t find_data_raw(range_t range, int16_t *buf, ssize_t pattern_size, size_t offset, int align, int options, const char *name) {
1fa6b8f @comex pointless casts
authored
10 int8_t ps = (int8_t) pattern_size;
11 if(ps != pattern_size) {
12 die("pattern too long");
13 }
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
14 // the problem with this is that it is faster to search for everything at once
5bab617 @comex Merge
authored
15
16 // reduce inefficiency
17 for(int pos = pattern_size - 1; pos >= 0; pos--) {
18 if(buf[pos] == -1) {
19 pattern_size--;
20 } else {
21 break;
22 }
23 }
e740134 @comex add some missing files
authored
24 int8_t table[256];
25 for(int c = 0; c < 256; c++) {
1fa6b8f @comex pointless casts
authored
26 table[c] = ps;
e740134 @comex add some missing files
authored
27 }
5bab617 @comex Merge
authored
28 for(int8_t pos = 0; pos < ps - 1; pos++) {
e740134 @comex add some missing files
authored
29 if(buf[pos] == -1) {
30 // Unfortunately, we can't put any character past being in this position...
31 for(int i = 0; i < 256; i++) {
1fa6b8f @comex pointless casts
authored
32 table[i] = ps - pos - 1;
e740134 @comex add some missing files
authored
33 }
34 } else {
1fa6b8f @comex pointless casts
authored
35 table[buf[pos]] = ps - pos - 1;
e740134 @comex add some missing files
authored
36 }
37 }
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
38
5bab617 @comex Merge
authored
39 // this can't be -1 due to above
40 int8_t shift = table[buf[pattern_size - 1]];
41 table[buf[pattern_size - 1]] = 0;
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
42
e740134 @comex add some missing files
authored
43 // now, for each c, let x be the last position in the string, other than the final position, where c might appear, or -1 if it doesn't appear anywhere; table[i] is size - x - 1.
44 // so if we got c but no match, we can skip ahead by table[i]
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
45 // updated
46 buf += pattern_size - 1;
e740134 @comex add some missing files
authored
47 addr_t foundit = 0;
cecc109 @comex big fancy refactor
authored
48 prange_t pr = rangeconv(range, MUST_FIND);
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
49 uint8_t *start = pr.start + pattern_size - 1;
a2e2520 @comex stuff
authored
50 uint8_t *end = pr.start + pr.size;
5bab617 @comex Merge
authored
51 uint8_t *cutoff = end - 400; // arbitrary
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
52 uint8_t *cursor = start;
53
54 #define GUTS(keep_going) \
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
55 { \
56 for(int i = 0; i >= (-pattern_size + 1); i--) { \
57 if(buf[i] != -1 && cursor[i] != buf[i]) { \
58 /* Not a match */ \
59 goto keep_going; \
60 } \
61 } \
62 /* Whoa, we found it */ \
63 addr_t new_match = cursor - start + range.start; \
64 if(align && (new_match & (align - 1))) { \
65 /* Just kidding. */ \
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
66 goto keep_going; \
67 } \
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
68 if(foundit) { \
b0d35b9 @comex something
authored
69 die("found [%s] multiple times in range: first at %08llx then at %08llx", name, (uint64_t) foundit, (uint64_t) new_match); \
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
70 } \
71 foundit = new_match; \
72 if(align) { \
73 goto done; \
74 } \
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
75 } \
76 /* otherwise, keep searching to make sure we won't find it again */ \
77 keep_going: \
78 cursor += shift;
5bab617 @comex Merge
authored
79
80 uint8_t jump;
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
81
82 while(1) {
83 if(cursor >= cutoff) break;
84 do {
85 jump = table[*cursor];
86 cursor += jump;
87 jump = table[*cursor];
88 cursor += jump;
89 jump = table[*cursor];
90 cursor += jump;
91 if(cursor >= end) goto done;
92 } while(jump);
93 GUTS(lbl1)
94 }
95 if(cursor >= end) goto done;
96 while(1) {
5bab617 @comex Merge
authored
97 do {
98 jump = table[*cursor];
99 cursor += jump;
100 if(cursor >= end) goto done;
101 } while(jump);
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
102 GUTS(lbl2)
e740134 @comex add some missing files
authored
103 }
1ad73ce @comex http://ridiculousfish.com/blog/archives/2006/05/30/old-age-and-treach…
authored
104 done:
e740134 @comex add some missing files
authored
105 if(foundit) {
106 return foundit + offset;
ec1623c @comex bool->int
authored
107 } else if(options & MUST_FIND) {
b0d35b9 @comex something
authored
108 die("didn't find [%s] in range (%08llx, %zx)", name, (uint64_t) range.start, range.size);
e740134 @comex add some missing files
authored
109 } else {
110 return 0;
111 }
112 }
113
5d8c0c7 @comex DFA
authored
114 static void parse_pattern(const char *to_find, int16_t buf[128], ssize_t *pattern_size, ssize_t *offset) {
115 *pattern_size = 0;
8acb169 @comex fixes and updates
authored
116 *offset = 0;
376685a @comex fixes; dicking around with exceptions
authored
117 autofree char *to_find_ = strdup(to_find);
e740134 @comex add some missing files
authored
118 while(to_find_) {
119 char *bit = strsep(&to_find_, " ");
120 if(!strcmp(bit, "-")) {
5d8c0c7 @comex DFA
authored
121 *offset = *pattern_size;
e740134 @comex add some missing files
authored
122 continue;
123 } else if(!strcmp(bit, "+")) {
5d8c0c7 @comex DFA
authored
124 *offset = *pattern_size + 1;
e740134 @comex add some missing files
authored
125 continue;
126 } else if(!strcmp(bit, "..")) {
5d8c0c7 @comex DFA
authored
127 buf[*pattern_size] = -1;
e740134 @comex add some missing files
authored
128 } else {
129 char *endptr;
5d8c0c7 @comex DFA
authored
130 buf[*pattern_size] = (int16_t) (strtol(bit, &endptr, 16) & 0xff);
e740134 @comex add some missing files
authored
131 if(*endptr) {
132 die("invalid bit %s in [%s]", bit, to_find);
133 }
134 }
5d8c0c7 @comex DFA
authored
135 if(++*pattern_size >= 128) {
e740134 @comex add some missing files
authored
136 die("pattern [%s] too big", to_find);
137 }
138 }
5d8c0c7 @comex DFA
authored
139 }
140
ec1623c @comex bool->int
authored
141 addr_t find_data(range_t range, const char *to_find, int align, int options) {
5d8c0c7 @comex DFA
authored
142 int16_t buf[128];
143 ssize_t pattern_size, offset;
144 parse_pattern(to_find, buf, &pattern_size, &offset);
ec1623c @comex bool->int
authored
145 return find_data_raw(range, buf, pattern_size, offset, align, options, to_find);
e740134 @comex add some missing files
authored
146 }
147
ec1623c @comex bool->int
authored
148 addr_t find_string(range_t range, const char *string, int align, int options) {
e740134 @comex add some missing files
authored
149 size_t len = strlen(string);
376685a @comex fixes; dicking around with exceptions
authored
150 autofree int16_t *buf = malloc(sizeof(int16_t) * (len + 2));
e740134 @comex add some missing files
authored
151 buf[0] = buf[len + 1] = 0;
99b4d5a @comex just because it's open source doesn't mean I have to write good commi…
authored
152 for(unsigned int i = 0; i < len; i++) {
e740134 @comex add some missing files
authored
153 buf[i+1] = (uint8_t) string[i];
154 }
be67f52 @comex sectrange
authored
155 bool pz = options & PRECEDING_ZERO;
b0d35b9 @comex something
authored
156 bool tz = options & TRAILING_ZERO;
157 addr_t result = find_data_raw(range, pz ? buf : buf + 1, len + tz + pz, pz ? 1 : 0, align, options, string);
e740134 @comex add some missing files
authored
158 return result;
159 }
160
ec1623c @comex bool->int
authored
161 addr_t find_bytes(range_t range, const char *bytes, size_t len, int align, int options) {
376685a @comex fixes; dicking around with exceptions
authored
162 autofree int16_t *buf = malloc(sizeof(int16_t) * (len + 2));
4b1d25e @comex make it better
authored
163 for(unsigned int i = 0; i < len; i++) {
164 buf[i] = (uint8_t) bytes[i];
165 }
ec1623c @comex bool->int
authored
166 addr_t result = find_data_raw(range, buf, len, 0, align, options, "bytes");
4b1d25e @comex make it better
authored
167 return result;
168 }
ec1623c @comex bool->int
authored
169 addr_t find_int32(range_t range, uint32_t number, int options) {
cecc109 @comex big fancy refactor
authored
170 prange_t pr = rangeconv(range, MUST_FIND);
d2f1738 @comex blah
authored
171 char *start = pr.start;
172 char *end = pr.start + pr.size;
a2e2520 @comex stuff
authored
173 for(char *p = start; p + 4 <= end; p++) {
e740134 @comex add some missing files
authored
174 if(*((uint32_t *)p) == number) {
175 return p - start + range.start;
176 }
177 }
ec1623c @comex bool->int
authored
178 if(options & MUST_FIND) {
63d5d54 @comex rejigger
authored
179 die("didn't find %08x in range", number);
e740134 @comex add some missing files
authored
180 } else {
181 return 0;
182 }
183 }
184
ec1623c @comex bool->int
authored
185 // search for push {..., lr}; add r7, sp, ...
186 // if is_thumb = 2, then search for both thumb and arm variants
be62ccb @comex unfail
authored
187 addr_t find_bof(range_t range, addr_t eof, int is_thumb) {
ec1623c @comex bool->int
authored
188 addr_t start = eof & ~1;
be62ccb @comex unfail
authored
189 if(start - range.start >= range.size) {
b0d35b9 @comex something
authored
190 die("out of range: %llx", (uint64_t) eof);
be62ccb @comex unfail
authored
191 }
192
cecc109 @comex big fancy refactor
authored
193 uint8_t *p = rangeconv(range, MUST_FIND).start + (start - range.start);
be62ccb @comex unfail
authored
194 addr_t addr = start;
8acb169 @comex fixes and updates
authored
195 if(addr & 1) { p--; addr--; }
196 for(p -= 8, addr -= 8; addr >= start - 0x1000 && addr >= range.start; p -= 2, addr -= 2) {
ec1623c @comex bool->int
authored
197 if(p[1] == 0xb5 && p[3] == 0xaf && is_thumb != 0) {
be62ccb @comex unfail
authored
198 return addr | 1;
ec1623c @comex bool->int
authored
199 } else if(p[2] == 0x2d && p[3] == 0xe9 &&
200 p[6] == 0x8d && p[7] == 0xe2 &&
be62ccb @comex unfail
authored
201 is_thumb != 1 && !(addr & 2)) {
202 return addr;
e740134 @comex add some missing files
authored
203 }
ec1623c @comex bool->int
authored
204
e740134 @comex add some missing files
authored
205 }
b0d35b9 @comex something
authored
206 die("couldn't find the beginning of %08llx", (uint64_t) eof);
e740134 @comex add some missing files
authored
207 }
208
ec1623c @comex bool->int
authored
209 uint32_t resolve_ldr(const struct binary *binary, addr_t addr) {
d9c3289 @comex read32 -> b_read32
authored
210 uint32_t val = b_read32(binary, addr & ~1);
e740134 @comex add some missing files
authored
211 addr_t target;
212 if(addr & 1) {
213 addr_t base = ((addr + 3) & ~3);
214 if((val & 0xf800) == 0x4800) { // thumb
215 target = base + ((val & 0xff) * 4);
216 } else if((val & 0xffff) == 0xf8df) { // thumb-2
217 target = base + ((val & 0x0fff0000) >> 16);
218 } else {
b0d35b9 @comex something
authored
219 die("weird thumb instruction %08x at %08llx", val, (uint64_t) addr);
e740134 @comex add some missing files
authored
220 }
221 } else {
222 addr_t base = addr + 8;
223 if((val & 0x0fff0000) == 0x59f0000) { // arm
224 target = base + (val & 0xfff);
225 } else {
b0d35b9 @comex something
authored
226 die("weird ARM instruction %08x at %08llx", val, (uint64_t) addr);
e740134 @comex add some missing files
authored
227 }
228 }
d9c3289 @comex read32 -> b_read32
authored
229 return b_read32(binary, target);
e740134 @comex add some missing files
authored
230 }
231
995817e @comex BL
authored
232 addr_t find_bl(range_t *range) {
233 bool thumb = range->start & 1;
234 range->start &= ~1;
cecc109 @comex big fancy refactor
authored
235 prange_t pr = rangeconv(*range, MUST_FIND);
995817e @comex BL
authored
236 uint32_t diff;
237 void *base;
238 if(thumb) {
239 uint16_t *p = pr.start;
240 while((uintptr_t)(p + 2) <= (uintptr_t)pr.start + pr.size) {
241 base = p;
242 uint16_t val = *p++;
243 if((val & 0xf800) == 0xf000) {
244 uint32_t imm10 = val & 0x3ff;
245 uint32_t S = ((val & 0x400) >> 10);
246 uint16_t val2 = *p++;
247
248 uint32_t J1 = ((val2 & 0x2000) >> 13);
249 uint32_t J2 = ((val2 & 0x800) >> 11);
250 uint32_t I1 = ~(J1 ^ S) & 1, I2 = ~(J2 ^ S) & 1;
251 uint32_t imm11 = val2 & 0x7ff;
252 diff = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
253
254 if((val2 & 0xd000) == 0xd000) {
255 // BL
256 diff |= 1;
257 goto ok;
258 } else if((val2 & 0xd000) == 0xc000) {
259 // BLX
260 goto ok;
261 }
262 }
263 }
264 } else {
265 uint32_t *p = pr.start;
266 while((uintptr_t)(p + 1) <= (uintptr_t)pr.start + pr.size) {
267 base = p;
268 uint32_t val = *p++;
269 if((val & 0xfe000000) == 0xfa000000) {
270 // BL
271 diff = ((val & 0xffffff) << 2);
272 goto ok;
273 } else if((val & 0x0f000000) == 0x0b000000) {
274 // BLX
275 diff = ((val & 0x1000000) >> 23) | ((val & 0xffffff) << 2) | 1;
276 goto ok;
277 }
278 }
279 }
280 return 0;
281 ok:;
282 addr_t baseaddr = ((char *) base) - ((char *) pr.start) + range->start + 4;
283 range->start = baseaddr + thumb;
284 if(diff & 0x800000) diff |= 0xff000000;
285 return baseaddr + diff;
286 }
287
9731118 @comex findanywhere
authored
288 #define unparen(args...) args
289 #define find_anywhere_func(name, args1, args2) \
290 addr_t b_find_##name##_anywhere(const struct binary *binary, unparen args1, int options) { \
291 uint32_t end = binary->nsegments - 1; \
292 for(uint32_t i = 0; i <= end; i++) { \
293 range_t range = binary->segments[i].vm_range; \
294 addr_t result = find_##name(range, unparen args2, i == end ? options : options & ~MUST_FIND); \
295 if(result) return result; \
296 } \
297 return 0; /* won't reach */ \
e740134 @comex add some missing files
authored
298 }
5d8c0c7 @comex DFA
authored
299
9731118 @comex findanywhere
authored
300 find_anywhere_func(data, (const char *to_find, int align), (to_find, align))
301 find_anywhere_func(string, (const char *string, int align), (string, align))
302 find_anywhere_func(bytes, (const char *bytes, size_t len, int align), (bytes, len, align))
303 find_anywhere_func(int32, (uint32_t number), (number))
304
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
305 struct pattern {
306 int16_t buf[128];
307 ssize_t pattern_size, offset;
308 const char *name;
309 addr_t *result;
310 };
311
5d8c0c7 @comex DFA
authored
312 struct findmany {
313 range_t range;
314 int num_patterns;
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
315 struct pattern *patterns;
5d8c0c7 @comex DFA
authored
316 };
317
318 struct findmany *findmany_init(range_t range) {
319 struct findmany *fm = malloc(sizeof(*fm));
320 fm->range = range;
321 fm->num_patterns = 0;
322 fm->patterns = NULL;
323 return fm;
324 }
325
326 void findmany_add(addr_t *result, struct findmany *fm, const char *to_find) {
327 if(fm->num_patterns >= 32) {
328 die("too many patterns");
329 }
330 fm->num_patterns++;
331 fm->patterns = realloc(fm->patterns, sizeof(struct pattern) * fm->num_patterns);
332 struct pattern *pat = &fm->patterns[fm->num_patterns - 1];
333
334 parse_pattern(to_find, pat->buf, &pat->pattern_size, &pat->offset);
335 pat->name = to_find;
336 pat->result = result;
337 *result = 0;
338 }
339
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
340 struct node {
341 uint16_t next[16];
342 uint32_t terminates;
343 };
344
345 struct node2 {
346 uint16_t next[16];
347 };
348
5d8c0c7 @comex DFA
authored
349 struct findmany2 {
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
350 struct node *nodes;
5d8c0c7 @comex DFA
authored
351 uint8_t *index_paths;
352 uint16_t node_count;
763ad89 @comex now compiles as C++ if you want to do that for some reason (I want to…
authored
353 struct node2 *nodes2;
5d8c0c7 @comex DFA
authored
354 uint16_t node2_count;
355 };
356
357 static inline int find_or_create(void **restrict array, void *restrict cmp, uint16_t *restrict num_items, int item_size, uint16_t *restrict node) {
358 char *p = *array;
359 for(int j = 0; j < *num_items; j++) {
360 if(!memcmp(p, cmp, item_size)) {
361 *node = j;
362 return false;
363 }
364 p += item_size;
365 }
366 if(*num_items == 65535) {
367 die("welp");
368 }
369 *node = *num_items;
370 *array = realloc(*array, ++*num_items * item_size);
371 memcpy(*array + *node * item_size, cmp, item_size);
372 return true;
373 }
374
375 static uint16_t findmany_recurse(const struct findmany *restrict fm, struct findmany2 *restrict fm2, uint8_t *restrict cur_index_path) {
376 // this part is inefficient
377 uint16_t node;
378 if(!find_or_create((void **) &fm2->index_paths, cur_index_path, &fm2->node_count, fm->num_patterns, &node)) {
379 // it found an existing node
380 return node;
381 }
382 /*
383 printf("findmany_recurse: created node %d with cur_index_path = ", node);
384 for(int p = 0; p < fm->num_patterns; p++) {
385 printf(" %d", (int) cur_index_path[p]);
386 }
387 printf("\n");
388 */
389 memcpy(fm2->index_paths + node * fm->num_patterns, cur_index_path, fm->num_patterns);
390
391 fm2->nodes = realloc(fm2->nodes, fm2->node_count * sizeof(struct node));
392 struct node *nodep = &fm2->nodes[node];
393
394 nodep->terminates = 0;
395 for(int p = 0; p < fm->num_patterns; p++) {
396 if(cur_index_path[p] == fm->patterns[p].pattern_size) {
397 nodep->terminates |= (1 << p);
398 cur_index_path[p] = 0;
399 }
400 }
401
402 autofree uint8_t *new_index_path = malloc(fm->num_patterns);
403 for(uint8_t hi = 0; hi < 16; hi++) {
404 struct node2 n2;
405 for(uint8_t lo = 0; lo < 16; lo++) {
406 uint8_t chr = hi * 16 + lo;
407
408 uint8_t orr = 0; // hack - a lot will point to 0
409
410 for(int p = 0; p < fm->num_patterns; p++) {
411 uint8_t idx = cur_index_path[p];
412 int16_t comp = fm->patterns[p].buf[idx];
413 if(comp == -1 || comp == chr) {
414 idx++;
415 } else {
416 idx = 0;
417 }
418 new_index_path[p] = idx;
419 orr |= idx;
420 }
421
422 n2.next[lo] = orr ? findmany_recurse(fm, fm2, new_index_path) : 0;
423 }
424 nodep = &fm2->nodes[node];
425 find_or_create((void **) &fm2->nodes2, &n2, &fm2->node2_count, sizeof(struct node2), &nodep->next[hi]);
426 }
427
428 //printf("returning node %d\n", node);
429 return node;
430 }
431
432 void findmany_go(struct findmany *fm) {
433 struct findmany2 fm2;
434 memset(&fm2, 0, sizeof(fm2));
435 autofree uint8_t *cur_index_path = calloc(1, fm->num_patterns);
436 #ifdef PROFILING
437 clock_t a = clock();
438 #endif
439 findmany_recurse(fm, &fm2, cur_index_path);
440 #ifdef PROFILING
441 clock_t b = clock();
442 printf("it took %d clocks to prepare the DFA\n", (int) (b - a));
443 #endif
444
cecc109 @comex big fancy refactor
authored
445 prange_t pr = rangeconv(fm->range, MUST_FIND);
5d8c0c7 @comex DFA
authored
446 uint8_t *start = pr.start;
447 struct node *cur = &fm2.nodes[0];
448 for(uint8_t *ptr = start; ptr < start + pr.size; ptr++) {
449 uint8_t chr = *ptr;
450
451 cur = &fm2.nodes[fm2.nodes2[cur->next[chr / 16]].next[chr % 16]];
452 if(cur->terminates) {
453 for(int p = 0, bit = 1; p < fm->num_patterns; p++, bit <<= 1) {
454 if(cur->terminates & bit) {
455 struct pattern *pat = &fm->patterns[p];
456 addr_t result = ptr - pat->pattern_size - start + fm->range.start + 1;
457 if(*pat->result) {
b0d35b9 @comex something
authored
458 die("found [%s] multiple times in range: first at %08llx then at %08llx", pat->name, (uint64_t) *pat->result, (uint64_t) result);
5d8c0c7 @comex DFA
authored
459 }
460 *pat->result = result + pat->offset;
461 }
462
463 }
464 }
465 }
466
467 free(fm2.index_paths); // could be done earlier
468 free(fm2.nodes);
469 free(fm2.nodes2);
470
471 for(int p = 0; p < fm->num_patterns; p++) {
472 struct pattern *pat = &fm->patterns[p];
473 if(!*pat->result) {
b0d35b9 @comex something
authored
474 die("didn't find [%s] in range(%llx, %zx)", pat->name, (uint64_t) fm->range.start, fm->range.size);
5d8c0c7 @comex DFA
authored
475 }
476 }
477
478 free(fm->patterns);
479 free(fm);
480 }
Something went wrong with that request. Please try again.