forked from mdmonk/python_snippets
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_bin_with_regex.py
66 lines (54 loc) · 2.54 KB
/
parse_bin_with_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import re
from struct import unpack, pack
def parse(buf):
# Compile a regex that can parse a buffer with an arbitrary number of
# records, each consisting of a short, a null-terminated string,
# and two more shorts. Incomplete records at the end of the file
# will be ignored. re.DOTALL ensures we treat newlines as data.
r = re.compile("(..)(.*?)\0(..)(..)", re.DOTALL)
# packed will be a list of tuples: (packed short, string, short, short).
# You can use finditer instead to save memory on a large file, but
# it will return MatchObjects rather than tuples.
packed = r.findall(buf)
# Create an unpacked list of tuples, mirroring the packed list.
# Perl equivalent: @objlist = unpack("(S Z* S S)*", $buf);
# Note that we do not need to unpack the string, because its
# packed and unpacked representations are identical.
objlist = map(lambda x: (short(x[0]), x[1], short(x[2]), short(x[3])), packed)
# Alternatively, unpack using a list comprehension:
# objlist = [ ( short(x[0]), x[1], short(x[2]), short(x[3]) ) for x in packed ]
# Create a dictionary from the packed list. The records hold object id,
# description, and x and y coordinates, and we want to index by id.
# We could also create it from the unpacked list, of course.
objdict = {}
for x in packed:
id = short(x[0])
objdict[id] = {}
objdict[id]["desc"] = x[1]
objdict[id]["x"] = short(x[2])
objdict[id]["y"] = short(x[3])
return objlist, objdict
# Converts 2-byte string to little-endian short value.
# unpack returns a tuple, so we grab the first (and only) element.
def short(x):
return unpack("<H", x)[0]
# Packs the arguments into a string that parse() can read,
# for testing.
def packobj(id, desc, x, y):
return pack("<H", id) + desc + "\0" + pack("<HH", x, y)
if __name__ == '__main__':
# Pack test objects into string buffer. Normally, you'd load buf
# with file data, perhaps with buf = file(filename, "rb").read()
buf = ""
buf += packobj(768, "golden helmet", 3, 4)
buf += packobj(234, "windmill", 20, 30)
# Test inclusion of newline in string
buf += packobj( 35, "pitcher\nand stone", 1, 2)
# Also add a bit of garbage at the end,
# which the parser should ignore.
buf += "garbage";
# Parse buffer into list and dictionary of objects
olist, odict = parse(buf)
print olist
print odict
print odict[35]["desc"] # should retain the newline