|
2 | 2 | Module for deserializing/serializing to and from VDF |
3 | 3 | """ |
4 | 4 | __version__ = "1.10" |
5 | | -import struct |
6 | 5 | __author__ = "Rossen Georgiev" |
7 | 6 |
|
8 | 7 | import re |
9 | 8 | import sys |
| 9 | +import struct |
10 | 10 | from io import StringIO as unicodeIO |
11 | 11 |
|
12 | 12 | # Py2 & Py3 compability |
13 | 13 | if sys.version_info[0] >= 3: |
14 | 14 | string_type = str |
| 15 | + int_type = int |
15 | 16 | BOMS = '\ufffe\ufeff' |
16 | 17 |
|
17 | 18 | def strip_bom(line): |
18 | 19 | return line.lstrip(BOMS) |
19 | 20 | else: |
20 | 21 | from StringIO import StringIO as strIO |
21 | 22 | string_type = basestring |
| 23 | + int_type = long |
22 | 24 | BOMS = '\xef\xbb\xbf\xff\xfe\xfe\xff' |
23 | 25 | BOMS_UNICODE = '\\ufffe\\ufeff'.decode('unicode-escape') |
24 | 26 |
|
@@ -110,83 +112,90 @@ def parse(fp, mapper=dict): |
110 | 112 |
|
111 | 113 | return stack.pop() |
112 | 114 |
|
113 | | - |
114 | | -def _read_till_seperator(fp, seperator="\x00", buffersize=2048): |
115 | | - tmp = "" |
116 | | - start_offset = fp.tell() |
117 | | - _sep_length = len(seperator) |
118 | | - while True: |
119 | | - data = fp.read(buffersize) |
120 | | - if not data: |
121 | | - return tmp |
122 | | - tmp += data |
123 | | - index = tmp.find(seperator) |
124 | | - if index != -1: |
125 | | - fp.seek(start_offset + index + _sep_length) |
126 | | - return tmp[:index] |
127 | | - |
128 | | - |
129 | | -BIN_NONE = '\x00' |
130 | | -BIN_STRING = '\x01' |
131 | | -BIN_INT32 = '\x02' |
132 | | -BIN_FLOAT32 = '\x03' |
133 | | -BIN_POINTER = '\x04' |
134 | | -BIN_WIDESTRING = '\x05' |
135 | | -BIN_COLOR = '\x06' |
136 | | -BIN_UINT64 = '\x07' |
137 | | -BIN_END = '\x08' |
138 | | -def parse_binary(source, mapper=dict): |
| 115 | +BIN_NONE = b'\x00' |
| 116 | +BIN_STRING = b'\x01' |
| 117 | +BIN_INT32 = b'\x02' |
| 118 | +BIN_FLOAT32 = b'\x03' |
| 119 | +BIN_POINTER = b'\x04' |
| 120 | +BIN_WIDESTRING = b'\x05' |
| 121 | +BIN_COLOR = b'\x06' |
| 122 | +BIN_UINT64 = b'\x07' |
| 123 | +BIN_END = b'\x08' |
| 124 | + |
| 125 | +def binary_loads(s, mapper=dict): |
139 | 126 | """ |
140 | | - Deserialize ``source`` (a ``str`` or file like object containing a VDF in "binary form") |
| 127 | + Deserialize ``s`` (a ``str`` containing a VDF in "binary form") |
141 | 128 | to a Python object. |
142 | 129 |
|
143 | 130 | ``mapper`` specifies the Python object used after deserializetion. ``dict` is |
144 | 131 | used by default. Alternatively, ``collections.OrderedDict`` can be used if you |
145 | 132 | wish to preserve key order. Or any object that acts like a ``dict``. |
146 | 133 | """ |
| 134 | + if not isinstance(s, bytes): |
| 135 | + raise TypeError("Expected s to be bytes, got %s", type(s)) |
147 | 136 | if not issubclass(mapper, dict): |
148 | 137 | raise TypeError("Expected mapper to be subclass of dict, got %s", type(mapper)) |
149 | | - if hasattr(source, 'read'): |
150 | | - fp = source |
151 | | - elif isinstance(source, string_type): |
152 | | - fp = strIO(source) |
153 | | - else: |
154 | | - raise TypeError("Expected source to be str or file-like object") |
155 | | - |
156 | | - # init |
157 | | - stack = [mapper()] |
158 | | - _read_int32 = struct.Struct('<i').unpack |
159 | | - _read_uint64 = struct.Struct('<Q').unpack |
160 | | - _read_float32 = struct.Struct('<f').unpack |
161 | | - |
162 | | - while True: |
163 | | - obj_type = fp.read(1) |
164 | | - if obj_type == BIN_END: |
| 138 | + |
| 139 | + # helpers |
| 140 | + int32 = struct.Struct('<i') |
| 141 | + uint64 = struct.Struct('<Q') |
| 142 | + float32 = struct.Struct('<f') |
| 143 | + |
| 144 | + def read_string(s, idx, wide=False): |
| 145 | + end = s.find(b'\x00\x00' if wide else b'\x00', idx) |
| 146 | + if end == -1: |
| 147 | + raise SyntaxError("Unterminated cstring, index: %d" % idx) |
| 148 | + result = s[idx:end] |
| 149 | + if wide: |
| 150 | + result = result.decode('utf-16') |
| 151 | + elif bytes is not str: |
| 152 | + result = result.decode('ascii') |
| 153 | + return result, end + (2 if wide else 1) |
| 154 | + |
| 155 | + stack = [mapper()] |
| 156 | + idx = 0 |
| 157 | + |
| 158 | + while len(s) > idx: |
| 159 | + t = s[idx:idx+1] |
| 160 | + idx += 1 |
| 161 | + |
| 162 | + if t == BIN_END: |
165 | 163 | if len(stack) > 1: |
166 | 164 | stack.pop() |
167 | | - else: |
168 | | - return stack[0] |
169 | | - continue |
170 | | - |
171 | | - obj_name = _read_till_seperator(fp, seperator="\x00") |
172 | | - if obj_type == BIN_NONE: |
173 | | - stack[-1][obj_name] = mapper() |
174 | | - stack.append(stack[-1][obj_name]) |
175 | | - elif obj_type == BIN_STRING: |
176 | | - value = _read_till_seperator(fp, seperator="\x00") |
177 | | - stack[-1][obj_name] = value |
178 | | - elif obj_type == BIN_INT32: |
179 | | - stack[-1][obj_name] = _read_int32(fp.read(4))[0] |
180 | | - elif obj_type == BIN_UINT64: |
181 | | - stack[-1][obj_name] = _read_uint64(fp.read(8))[0] |
182 | | - elif obj_type == BIN_FLOAT32: |
183 | | - stack[-1][obj_name] = _read_float32(fp.read(4))[0] |
184 | | - elif obj_type in (BIN_POINTER, BIN_WIDESTRING, BIN_COLOR): |
185 | | - # TODO: Check what they are and implement |
186 | | - raise SyntaxError('vdf.parse_binary: type not supported #%i' % ord(obj_type)) |
| 165 | + continue |
| 166 | + break |
| 167 | + |
| 168 | + key, idx = read_string(s, idx) |
| 169 | + |
| 170 | + if t == BIN_NONE: |
| 171 | + stack[-1][key] = mapper() |
| 172 | + stack.append(stack[-1][key]) |
| 173 | + elif t == BIN_STRING: |
| 174 | + stack[-1][key], idx = read_string(s, idx) |
| 175 | + elif t == BIN_WIDESTRING: |
| 176 | + stack[-1][key], idx = read_string(s, idx, wide=True) |
| 177 | + elif t in (BIN_INT32, BIN_POINTER, BIN_COLOR): |
| 178 | + val = int32.unpack_from(s, idx)[0] |
| 179 | + |
| 180 | + if t == BIN_POINTER: |
| 181 | + val = POINTER(val) |
| 182 | + elif t == BIN_COLOR: |
| 183 | + val = COLOR(val) |
| 184 | + |
| 185 | + stack[-1][key] = val |
| 186 | + idx += int32.size |
| 187 | + elif t == BIN_UINT64: |
| 188 | + stack[-1][key] = UINT_64(uint64.unpack_from(s, idx)[0]) |
| 189 | + idx += uint64.size |
| 190 | + elif t == BIN_FLOAT32: |
| 191 | + stack[-1][key] = float32.unpack_from(s, idx)[0] |
| 192 | + idx += float32.size |
187 | 193 | else: |
188 | | - raise SyntaxError('vdf.parse_binary: invalid type code #%i' % ord(obj_type)) |
189 | | - |
| 194 | + raise SyntaxError("Unknown data type at index %d: %s" % (idx-1, repr(t))) |
| 195 | + |
| 196 | + if len(s) != idx or len(stack) != 1: |
| 197 | + raise SyntaxError("Binary VDF ended at index %d, but length is %d" % (idx, len(s))) |
| 198 | + |
190 | 199 | return stack.pop() |
191 | 200 |
|
192 | 201 |
|
@@ -255,32 +264,69 @@ def _dump_gen(data, pretty=False, level=0): |
255 | 264 | yield "%s}\n" % line_indent |
256 | 265 | else: |
257 | 266 | yield '%s"%s" "%s"\n' % (line_indent, key, value) |
258 | | - |
259 | | - |
260 | | -def _dump_gen_binary(data, level=0): |
| 267 | + |
| 268 | + |
| 269 | +class BASE_INT(int_type): |
| 270 | + def __repr__(self): |
| 271 | + return "%s(%d)" % (self.__class__.__name__, self) |
| 272 | + |
| 273 | +class UINT_64(BASE_INT): |
| 274 | + pass |
| 275 | + |
| 276 | +class POINTER(BASE_INT): |
| 277 | + pass |
| 278 | + |
| 279 | +class COLOR(BASE_INT): |
| 280 | + pass |
| 281 | + |
| 282 | +def binary_dumps(obj): |
261 | 283 | """ |
262 | | - Serializes an dict (or an extension thereof) as binary vdf. |
263 | | - Every scalar need to be a tuple or list with the length 2 in the form: |
264 | | - (``data_type``, ``value``) |
265 | | - where ``data_type`` is one of (BIN_INT32, BIN_UINT64, BIN_FLOAT32, BIN_STRING) |
| 284 | + Serialize ``obj`` as a VDF formatted stream to ``fp`` (a |
| 285 | + ``.write()``-supporting file-like object). |
| 286 | +
|
266 | 287 | """ |
267 | | - type_mapper = { |
268 | | - BIN_INT32: struct.Struct('<i').pack, |
269 | | - BIN_UINT64: struct.Struct('<Q').pack, |
270 | | - BIN_FLOAT32: struct.Struct('<f').pack, |
271 | | - BIN_STRING: lambda x: x + "\x00", |
272 | | - } |
273 | | - for key, value in data.items(): |
| 288 | + return b''.join(_binary_dump_gen(obj)) |
| 289 | + |
| 290 | +def _binary_dump_gen(obj, level=0): |
| 291 | + if level == 0 and len(obj) == 0: |
| 292 | + return |
| 293 | + |
| 294 | + int32 = struct.Struct('<i') |
| 295 | + uint64 = struct.Struct('<Q') |
| 296 | + float32 = struct.Struct('<f') |
| 297 | + |
| 298 | + for key, value in obj.items(): |
| 299 | + if isinstance(key, string_type): |
| 300 | + key = key.encode('ascii') |
| 301 | + else: |
| 302 | + raise TypeError("dict keys must be of type str, got %s" % type(key)) |
| 303 | + |
274 | 304 | if isinstance(value, dict): |
275 | | - yield "".join((BIN_NONE, key, "\x00")) |
276 | | - for chunk in _dump_gen_binary(value, level+1): |
| 305 | + yield BIN_NONE + key + BIN_NONE |
| 306 | + for chunk in _binary_dump_gen(value, level+1): |
277 | 307 | yield chunk |
278 | | - yield BIN_END |
279 | | - else: |
280 | | - if not isinstance(value, (list, tuple)) or len(value) != 2: |
281 | | - raise TypeError("Values need to be a list or tuple with the length 2.") |
282 | | - type_code, type_data = value |
283 | | - if type_code in type_mapper: |
284 | | - yield "".join((type_code, key, "\x00", type_mapper[type_code](type_data))) |
| 308 | + elif isinstance(value, UINT_64): |
| 309 | + yield BIN_UINT64 + key + BIN_NONE + struct.pack('<Q', value) |
| 310 | + elif isinstance(value, string_type): |
| 311 | + try: |
| 312 | + value = value.encode('ascii') + BIN_NONE |
| 313 | + yield BIN_STRING |
| 314 | + except: |
| 315 | + value = value.encode('utf-16') + BIN_NONE*2 |
| 316 | + yield BIN_WIDESTRING |
| 317 | + yield key + BIN_NONE + value |
| 318 | + elif isinstance(value, float): |
| 319 | + yield BIN_FLOAT32 + key + BIN_NONE + struct.pack('<f', value) |
| 320 | + elif isinstance(value, (COLOR, POINTER, int, int_type)): |
| 321 | + if isinstance(value, COLOR): |
| 322 | + yield BIN_COLOR |
| 323 | + elif isinstance(value, POINTER): |
| 324 | + yield BIN_POINTER |
285 | 325 | else: |
286 | | - raise TypeError('Unsupported type') |
| 326 | + yield BIN_INT32 |
| 327 | + yield key + BIN_NONE |
| 328 | + yield struct.pack('<i', value) |
| 329 | + else: |
| 330 | + raise TypeError("Unsupported type: %s" % type(value)) |
| 331 | + |
| 332 | + yield BIN_END |
0 commit comments