From b287bd01836357fa6358c1eac102bd1db8b05a11 Mon Sep 17 00:00:00 2001 From: Rossen Date: Wed, 24 Jul 2019 17:55:40 +0100 Subject: [PATCH 1/2] fix #20; replace invalid unicode chars instead of raising UnicodeDecodeError --- vdf/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vdf/__init__.py b/vdf/__init__.py index 6c793c6..6620c1a 100644 --- a/vdf/__init__.py +++ b/vdf/__init__.py @@ -300,14 +300,14 @@ def read_string(s, idx, wide=False): raise SyntaxError("Unterminated cstring, index: %d" % idx) result = s[idx:end] if wide: - result = result.decode('utf-16') + result = result.decode('utf-16', 'replace') elif bytes is not str: - result = result.decode('utf-8') + result = result.decode('utf-8', 'replace') else: try: result.decode('ascii') except: - result = result.decode('utf-8') + result = result.decode('utf-8', 'replace') return result, end + (2 if wide else 1) stack = [mapper()] From 01d63dd38b3825d565fbf6f6e4b539aa2eac51f9 Mon Sep 17 00:00:00 2001 From: Rossen Georgiev Date: Fri, 26 Jul 2019 23:48:39 +0100 Subject: [PATCH 2/2] binary: utf16 decode + tests --- tests/test_binary_vdf.py | 5 +++++ vdf/__init__.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/test_binary_vdf.py b/tests/test_binary_vdf.py index f64d10e..6ef93ea 100644 --- a/tests/test_binary_vdf.py +++ b/tests/test_binary_vdf.py @@ -123,3 +123,8 @@ def test_vbkv_loads_invalid_checksum(self): with self.assertRaises(ValueError): vdf.vbkv_loads(b'VBKV\x01\x02\x03\x04\x00a\x00\x0b\x0b') + def test_loads_utf8_invalmid(self): + self.assertEqual({'aaa': b'bb\xef\xbf\xbdbb'.decode('utf-8')}, vdf.binary_loads(b'\x01aaa\x00bb\xffbb\x00\x08')) + + def test_loads_utf16(self): + self.assertEqual({'aaa': b'b\x00b\x00\xff\xffb\x00b\x00'.decode('utf-16le')}, vdf.binary_loads(b'\x05aaa\x00b\x00b\x00\xff\xffb\x00b\x00\x00\x00\x08')) diff --git a/vdf/__init__.py b/vdf/__init__.py index 6620c1a..74fdd83 100644 --- a/vdf/__init__.py +++ b/vdf/__init__.py @@ -295,12 +295,18 @@ def binary_loads(s, mapper=dict, merge_duplicate_keys=True, alt_format=False): float32 = struct.Struct('