Skip to content

Commit 483a6a9

Browse files
#3; complete implementation for binary vdf
1 parent 29989b7 commit 483a6a9

File tree

2 files changed

+209
-90
lines changed

2 files changed

+209
-90
lines changed

tests/test_binary_vdf.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import sys
2+
import unittest
3+
4+
import vdf
5+
from collections import OrderedDict
6+
7+
u = str if sys.version_info >= (3,) else unicode
8+
9+
10+
class BinaryVDF(unittest.TestCase):
11+
def test_BASE_INT(self):
12+
repr(vdf.BASE_INT())
13+
14+
def test_simple(self):
15+
pairs = [
16+
('a', 'test'),
17+
('a2', b'\xff\xfe0\x041\x042\x043\x04'.decode('utf-16')),
18+
('bb', 1),
19+
('bb2', -500),
20+
('ccc', 1.0),
21+
('dddd', vdf.POINTER(1234)),
22+
('fffff', vdf.COLOR(1234)),
23+
('gggggg', vdf.UINT_64(1234)),
24+
]
25+
26+
data = OrderedDict(pairs)
27+
data['level1-1'] = OrderedDict(pairs)
28+
data['level1-1']['level2-1'] = OrderedDict(pairs)
29+
data['level1-1']['level2-2'] = OrderedDict(pairs)
30+
data['level1-2'] = OrderedDict(pairs)
31+
32+
result = vdf.binary_loads(vdf.binary_dumps(data), mapper=OrderedDict)
33+
34+
self.assertEqual(data, result)
35+
36+
def test_loads_empty(self):
37+
self.assertEqual(vdf.binary_loads(b''), {})
38+
39+
def test_dumps_empty(self):
40+
self.assertEqual(vdf.binary_dumps({}), b'')
41+
42+
def test_dumps_unicode(self):
43+
self.assertEqual(vdf.binary_dumps({u('a'): u('b')}), b'\x01a\x00b\x00\x08')
44+
45+
def test_dumps_key_invalid_type(self):
46+
with self.assertRaises(TypeError):
47+
vdf.binary_dumps({1:1})
48+
with self.assertRaises(TypeError):
49+
vdf.binary_dumps({None:1})
50+
51+
def test_dumps_value_invalid_type(self):
52+
with self.assertRaises(TypeError):
53+
vdf.binary_dumps({'': None})
54+
55+
def test_loads_unbalanced_nesting(self):
56+
with self.assertRaises(SyntaxError):
57+
vdf.binary_loads(b'\x00a\x00\x00b\x00\x08')
58+
with self.assertRaises(SyntaxError):
59+
vdf.binary_loads(b'\x00a\x00\x00b\x00\x08\x08\x08\x08')
60+
61+
def test_loads_unknown_type(self):
62+
with self.assertRaises(SyntaxError):
63+
vdf.binary_loads(b'\x33a\x00\x08')
64+
65+
def test_loads_unterminated_string(self):
66+
with self.assertRaises(SyntaxError):
67+
vdf.binary_loads(b'\x01abbbb')
68+
69+
def test_loads_type_checks(self):
70+
with self.assertRaises(TypeError):
71+
vdf.binary_loads(None)
72+
with self.assertRaises(TypeError):
73+
vdf.binary_loads(b'', mapper=list)

vdf/__init__.py

Lines changed: 136 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,25 @@
22
Module for deserializing/serializing to and from VDF
33
"""
44
__version__ = "1.10"
5-
import struct
65
__author__ = "Rossen Georgiev"
76

87
import re
98
import sys
9+
import struct
1010
from io import StringIO as unicodeIO
1111

1212
# Py2 & Py3 compability
1313
if sys.version_info[0] >= 3:
1414
string_type = str
15+
int_type = int
1516
BOMS = '\ufffe\ufeff'
1617

1718
def strip_bom(line):
1819
return line.lstrip(BOMS)
1920
else:
2021
from StringIO import StringIO as strIO
2122
string_type = basestring
23+
int_type = long
2224
BOMS = '\xef\xbb\xbf\xff\xfe\xfe\xff'
2325
BOMS_UNICODE = '\\ufffe\\ufeff'.decode('unicode-escape')
2426

@@ -110,83 +112,90 @@ def parse(fp, mapper=dict):
110112

111113
return stack.pop()
112114

113-
114-
def _read_till_seperator(fp, seperator="\x00", buffersize=2048):
115-
tmp = ""
116-
start_offset = fp.tell()
117-
_sep_length = len(seperator)
118-
while True:
119-
data = fp.read(buffersize)
120-
if not data:
121-
return tmp
122-
tmp += data
123-
index = tmp.find(seperator)
124-
if index != -1:
125-
fp.seek(start_offset + index + _sep_length)
126-
return tmp[:index]
127-
128-
129-
BIN_NONE = '\x00'
130-
BIN_STRING = '\x01'
131-
BIN_INT32 = '\x02'
132-
BIN_FLOAT32 = '\x03'
133-
BIN_POINTER = '\x04'
134-
BIN_WIDESTRING = '\x05'
135-
BIN_COLOR = '\x06'
136-
BIN_UINT64 = '\x07'
137-
BIN_END = '\x08'
138-
def parse_binary(source, mapper=dict):
115+
BIN_NONE = b'\x00'
116+
BIN_STRING = b'\x01'
117+
BIN_INT32 = b'\x02'
118+
BIN_FLOAT32 = b'\x03'
119+
BIN_POINTER = b'\x04'
120+
BIN_WIDESTRING = b'\x05'
121+
BIN_COLOR = b'\x06'
122+
BIN_UINT64 = b'\x07'
123+
BIN_END = b'\x08'
124+
125+
def binary_loads(s, mapper=dict):
139126
"""
140-
Deserialize ``source`` (a ``str`` or file like object containing a VDF in "binary form")
127+
Deserialize ``s`` (a ``str`` containing a VDF in "binary form")
141128
to a Python object.
142129
143130
``mapper`` specifies the Python object used after deserializetion. ``dict` is
144131
used by default. Alternatively, ``collections.OrderedDict`` can be used if you
145132
wish to preserve key order. Or any object that acts like a ``dict``.
146133
"""
134+
if not isinstance(s, bytes):
135+
raise TypeError("Expected s to be bytes, got %s", type(s))
147136
if not issubclass(mapper, dict):
148137
raise TypeError("Expected mapper to be subclass of dict, got %s", type(mapper))
149-
if hasattr(source, 'read'):
150-
fp = source
151-
elif isinstance(source, string_type):
152-
fp = strIO(source)
153-
else:
154-
raise TypeError("Expected source to be str or file-like object")
155-
156-
# init
157-
stack = [mapper()]
158-
_read_int32 = struct.Struct('<i').unpack
159-
_read_uint64 = struct.Struct('<Q').unpack
160-
_read_float32 = struct.Struct('<f').unpack
161-
162-
while True:
163-
obj_type = fp.read(1)
164-
if obj_type == BIN_END:
138+
139+
# helpers
140+
int32 = struct.Struct('<i')
141+
uint64 = struct.Struct('<Q')
142+
float32 = struct.Struct('<f')
143+
144+
def read_string(s, idx, wide=False):
145+
end = s.find(b'\x00\x00' if wide else b'\x00', idx)
146+
if end == -1:
147+
raise SyntaxError("Unterminated cstring, index: %d" % idx)
148+
result = s[idx:end]
149+
if wide:
150+
result = result.decode('utf-16')
151+
elif bytes is not str:
152+
result = result.decode('ascii')
153+
return result, end + (2 if wide else 1)
154+
155+
stack = [mapper()]
156+
idx = 0
157+
158+
while len(s) > idx:
159+
t = s[idx:idx+1]
160+
idx += 1
161+
162+
if t == BIN_END:
165163
if len(stack) > 1:
166164
stack.pop()
167-
else:
168-
return stack[0]
169-
continue
170-
171-
obj_name = _read_till_seperator(fp, seperator="\x00")
172-
if obj_type == BIN_NONE:
173-
stack[-1][obj_name] = mapper()
174-
stack.append(stack[-1][obj_name])
175-
elif obj_type == BIN_STRING:
176-
value = _read_till_seperator(fp, seperator="\x00")
177-
stack[-1][obj_name] = value
178-
elif obj_type == BIN_INT32:
179-
stack[-1][obj_name] = _read_int32(fp.read(4))[0]
180-
elif obj_type == BIN_UINT64:
181-
stack[-1][obj_name] = _read_uint64(fp.read(8))[0]
182-
elif obj_type == BIN_FLOAT32:
183-
stack[-1][obj_name] = _read_float32(fp.read(4))[0]
184-
elif obj_type in (BIN_POINTER, BIN_WIDESTRING, BIN_COLOR):
185-
# TODO: Check what they are and implement
186-
raise SyntaxError('vdf.parse_binary: type not supported #%i' % ord(obj_type))
165+
continue
166+
break
167+
168+
key, idx = read_string(s, idx)
169+
170+
if t == BIN_NONE:
171+
stack[-1][key] = mapper()
172+
stack.append(stack[-1][key])
173+
elif t == BIN_STRING:
174+
stack[-1][key], idx = read_string(s, idx)
175+
elif t == BIN_WIDESTRING:
176+
stack[-1][key], idx = read_string(s, idx, wide=True)
177+
elif t in (BIN_INT32, BIN_POINTER, BIN_COLOR):
178+
val = int32.unpack_from(s, idx)[0]
179+
180+
if t == BIN_POINTER:
181+
val = POINTER(val)
182+
elif t == BIN_COLOR:
183+
val = COLOR(val)
184+
185+
stack[-1][key] = val
186+
idx += int32.size
187+
elif t == BIN_UINT64:
188+
stack[-1][key] = UINT_64(uint64.unpack_from(s, idx)[0])
189+
idx += uint64.size
190+
elif t == BIN_FLOAT32:
191+
stack[-1][key] = float32.unpack_from(s, idx)[0]
192+
idx += float32.size
187193
else:
188-
raise SyntaxError('vdf.parse_binary: invalid type code #%i' % ord(obj_type))
189-
194+
raise SyntaxError("Unknown data type at index %d: %s" % (idx-1, repr(t)))
195+
196+
if len(s) != idx or len(stack) != 1:
197+
raise SyntaxError("Binary VDF ended at index %d, but length is %d" % (idx, len(s)))
198+
190199
return stack.pop()
191200

192201

@@ -255,32 +264,69 @@ def _dump_gen(data, pretty=False, level=0):
255264
yield "%s}\n" % line_indent
256265
else:
257266
yield '%s"%s" "%s"\n' % (line_indent, key, value)
258-
259-
260-
def _dump_gen_binary(data, level=0):
267+
268+
269+
class BASE_INT(int_type):
270+
def __repr__(self):
271+
return "%s(%d)" % (self.__class__.__name__, self)
272+
273+
class UINT_64(BASE_INT):
274+
pass
275+
276+
class POINTER(BASE_INT):
277+
pass
278+
279+
class COLOR(BASE_INT):
280+
pass
281+
282+
def binary_dumps(obj):
261283
"""
262-
Serializes an dict (or an extension thereof) as binary vdf.
263-
Every scalar need to be a tuple or list with the length 2 in the form:
264-
(``data_type``, ``value``)
265-
where ``data_type`` is one of (BIN_INT32, BIN_UINT64, BIN_FLOAT32, BIN_STRING)
284+
Serialize ``obj`` as a VDF formatted stream to ``fp`` (a
285+
``.write()``-supporting file-like object).
286+
266287
"""
267-
type_mapper = {
268-
BIN_INT32: struct.Struct('<i').pack,
269-
BIN_UINT64: struct.Struct('<Q').pack,
270-
BIN_FLOAT32: struct.Struct('<f').pack,
271-
BIN_STRING: lambda x: x + "\x00",
272-
}
273-
for key, value in data.items():
288+
return b''.join(_binary_dump_gen(obj))
289+
290+
def _binary_dump_gen(obj, level=0):
291+
if level == 0 and len(obj) == 0:
292+
return
293+
294+
int32 = struct.Struct('<i')
295+
uint64 = struct.Struct('<Q')
296+
float32 = struct.Struct('<f')
297+
298+
for key, value in obj.items():
299+
if isinstance(key, string_type):
300+
key = key.encode('ascii')
301+
else:
302+
raise TypeError("dict keys must be of type str, got %s" % type(key))
303+
274304
if isinstance(value, dict):
275-
yield "".join((BIN_NONE, key, "\x00"))
276-
for chunk in _dump_gen_binary(value, level+1):
305+
yield BIN_NONE + key + BIN_NONE
306+
for chunk in _binary_dump_gen(value, level+1):
277307
yield chunk
278-
yield BIN_END
279-
else:
280-
if not isinstance(value, (list, tuple)) or len(value) != 2:
281-
raise TypeError("Values need to be a list or tuple with the length 2.")
282-
type_code, type_data = value
283-
if type_code in type_mapper:
284-
yield "".join((type_code, key, "\x00", type_mapper[type_code](type_data)))
308+
elif isinstance(value, UINT_64):
309+
yield BIN_UINT64 + key + BIN_NONE + struct.pack('<Q', value)
310+
elif isinstance(value, string_type):
311+
try:
312+
value = value.encode('ascii') + BIN_NONE
313+
yield BIN_STRING
314+
except:
315+
value = value.encode('utf-16') + BIN_NONE*2
316+
yield BIN_WIDESTRING
317+
yield key + BIN_NONE + value
318+
elif isinstance(value, float):
319+
yield BIN_FLOAT32 + key + BIN_NONE + struct.pack('<f', value)
320+
elif isinstance(value, (COLOR, POINTER, int, int_type)):
321+
if isinstance(value, COLOR):
322+
yield BIN_COLOR
323+
elif isinstance(value, POINTER):
324+
yield BIN_POINTER
285325
else:
286-
raise TypeError('Unsupported type')
326+
yield BIN_INT32
327+
yield key + BIN_NONE
328+
yield struct.pack('<i', value)
329+
else:
330+
raise TypeError("Unsupported type: %s" % type(value))
331+
332+
yield BIN_END

0 commit comments

Comments
 (0)