-
Notifications
You must be signed in to change notification settings - Fork 452
/
strings.py
103 lines (77 loc) · 2.83 KB
/
strings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Copyright (C) 2017 Mandiant, Inc. All Rights Reserved.
import re
from typing import Iterable
from itertools import chain
from floss.results import StaticString, StringEncoding
# we don't include \r and \n to make output easier to understand by humans and to simplify rendering
ASCII_BYTE = rb" !\"#\$%&\'\(\)\*\+,-\./0123456789:;<=>\?@ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\}\\\~\t"
ASCII_RE_4 = re.compile(rb"([%s]{%d,})" % (ASCII_BYTE, 4))
UNICODE_RE_4 = re.compile(rb"((?:[%s]\x00){%d,})" % (ASCII_BYTE, 4))
REPEATS = ["A", "\x00", "\xfe", "\xff"]
MIN_LENGTH = 4
SLICE_SIZE = 4096
def buf_filled_with(buf, character):
dupe_chunk = character * SLICE_SIZE
for offset in range(0, len(buf), SLICE_SIZE):
new_chunk = buf[offset : offset + SLICE_SIZE]
if dupe_chunk[: len(new_chunk)] != new_chunk:
return False
return True
def extract_ascii_unicode_strings(buf, n=MIN_LENGTH) -> Iterable[StaticString]:
yield from chain(extract_ascii_strings(buf, n), extract_unicode_strings(buf, n))
def extract_ascii_strings(buf, n=MIN_LENGTH) -> Iterable[StaticString]:
"""
Extract ASCII strings from the given binary data.
:param buf: A bytestring.
:type buf: str
:param n: The minimum length of strings to extract.
:type n: int
:rtype: Sequence[StaticString]
"""
if not buf:
return
if (buf[0] in REPEATS) and buf_filled_with(buf, buf[0]):
return
r = None
if n == 4:
r = ASCII_RE_4
else:
reg = rb"([%s]{%d,})" % (ASCII_BYTE, n)
r = re.compile(reg)
for match in r.finditer(buf):
yield StaticString(string=match.group().decode("ascii"), offset=match.start(), encoding=StringEncoding.ASCII)
def extract_unicode_strings(buf, n=MIN_LENGTH) -> Iterable[StaticString]:
"""
Extract naive UTF-16 strings from the given binary data.
:param buf: A bytestring.
:type buf: str
:param n: The minimum length of strings to extract.
:type n: int
:rtype: Sequence[StaticString]
"""
if not buf:
return
if (buf[0] in REPEATS) and buf_filled_with(buf, buf[0]):
return
if n == 4:
r = UNICODE_RE_4
else:
reg = rb"((?:[%s]\x00){%d,})" % (ASCII_BYTE, n)
r = re.compile(reg)
for match in r.finditer(buf):
try:
yield StaticString(
string=match.group().decode("utf-16"), offset=match.start(), encoding=StringEncoding.UTF16LE
)
except UnicodeDecodeError:
pass
def main():
import sys
with open(sys.argv[1], "rb") as f:
b = f.read()
for s in extract_ascii_strings(b):
print("0x{:x}: {:s}".format(s.offset, s.string))
for s in extract_unicode_strings(b):
print("0x{:x}: {:s}".format(s.offset, s.string))
if __name__ == "__main__":
main()