From bf38efbe6d75bcd5ce1372339b32889c4aca96b2 Mon Sep 17 00:00:00 2001 From: JahPowerBit Date: Tue, 24 Mar 2015 11:07:22 +0000 Subject: [PATCH 01/29] Fix: binary deserialization should return Atomic not str --- rlp/sedes/binary.py | 2 +- tests/test_json.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/rlp/sedes/binary.py b/rlp/sedes/binary.py index 3c03c21..66f892f 100644 --- a/rlp/sedes/binary.py +++ b/rlp/sedes/binary.py @@ -53,7 +53,7 @@ def deserialize(self, serial): raise DeserializationError(m.format(type(serial).__name__), serial) if self.is_valid_length(len(serial)): - return bytes_to_str(serial) + return serial else: raise DeserializationError('{} has invalid length'.format(type(serial)), serial) diff --git a/tests/test_json.py b/tests/test_json.py index 7ed4a3d..80c63ea 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -10,10 +10,17 @@ def evaluate(ll): else: return ll +def to_bytes(value): + if isinstance(value, str): + return utils.str_to_bytes(value) + elif isinstance(value, list): + return [to_bytes(v) for v in value] + else: + return value with open('tests/rlptest.json') as f: test_data = json.loads(f.read()) - test_pieces = [(name, {'in': in_out['in'], + test_pieces = [(name, {'in': to_bytes(in_out['in']), 'out': utils.str_to_bytes(in_out['out'])}) for name, in_out in test_data.items()] From eb19dac5bc0d3e6c46d0cc49c372259ae995d718 Mon Sep 17 00:00:00 2001 From: jnnk Date: Wed, 25 Mar 2015 16:29:00 +0100 Subject: [PATCH 02/29] Added strict option for decoding --- rlp/codec.py | 8 +++++--- tests/test_json.py | 6 +++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index 67fe6ea..1ea6b15 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -149,7 +149,7 @@ def consume_item(rlp, start): return consume_payload(rlp, s, t, l) -def decode(rlp, sedes=None, **kwargs): +def decode(rlp, sedes=None, strict=True, **kwargs): """Decode an RLP encoded object. :param sedes: an object implementing a function ``deserialize(code)`` which @@ -157,16 +157,18 @@ def decode(rlp, sedes=None, **kwargs): deserialization should be performed :param \*\*kwargs: additional keyword arguments that will be passed to the deserializer + :param strict: if false inputs that are longer than necessary don't cause + an exception :returns: the decoded and maybe deserialized Python object :raises: :exc:`rlp.DecodingError` if the input string does not end after - the root item + the root item and `strict` is true :raises: :exc:`rlp.DeserializationError` if the deserialization fails """ try: item, end = consume_item(rlp, 0) except IndexError: raise DecodingError('RLP string to short', rlp) - if end != len(rlp): + if end != len(rlp) and strict: msg = 'RLP string ends with {} superfluous bytes'.format(len(rlp) - end) raise DecodingError(msg, rlp) if sedes: diff --git a/tests/test_json.py b/tests/test_json.py index 7ed4a3d..2e80e65 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -1,7 +1,7 @@ import json import pytest import rlp -from rlp import encode, decode, decode_lazy, infer_sedes, utils +from rlp import encode, decode, decode_lazy, infer_sedes, utils, DecodingError def evaluate(ll): @@ -33,6 +33,10 @@ def test_decode(name, in_out): msg_format = 'Test {} failed (decoded {} to {} instead of {})' rlp_string = utils.decode_hex(in_out['out']) decoded = decode(rlp_string) + with pytest.raises(DecodingError): + decode(rlp_string + '\x00') + assert decoded == decode(rlp_string + '\x00', strict=False) + assert decoded == evaluate(decode_lazy(rlp_string)) expected = in_out['in'] sedes = infer_sedes(expected) From d645f31d2c2861aadcc647d40c08b73c1bbb6100 Mon Sep 17 00:00:00 2001 From: jnnk Date: Wed, 25 Mar 2015 16:31:33 +0100 Subject: [PATCH 03/29] Py3 compatibility --- tests/test_json.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_json.py b/tests/test_json.py index 10e5840..5a2a6b8 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -41,8 +41,8 @@ def test_decode(name, in_out): rlp_string = utils.decode_hex(in_out['out']) decoded = decode(rlp_string) with pytest.raises(DecodingError): - decode(rlp_string + '\x00') - assert decoded == decode(rlp_string + '\x00', strict=False) + decode(rlp_string + b'\x00') + assert decoded == decode(rlp_string + b'\x00', strict=False) assert decoded == evaluate(decode_lazy(rlp_string)) expected = in_out['in'] From 9027d9c71c1042934e64be8dfc64f64fc7f133ed Mon Sep 17 00:00:00 2001 From: jnnk Date: Wed, 25 Mar 2015 16:59:33 +0100 Subject: [PATCH 04/29] fixed docstring --- rlp/codec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index 1ea6b15..467817a 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -181,8 +181,8 @@ def infer_sedes(obj): """Try to find a sedes objects suitable for a given Python object. The sedes objects considered are `obj`'s class, `big_endian_int` and - `binary`. If `obj` is a sequence, a :class:`ListSedes` will be constructed - recursively. + `binary`. If `obj` is a sequence, a :class:`rlp.sedes.List` will be + constructed recursively. :param obj: the python object for which to find a sedes object :raises: :exc:`TypeError` if no appropriate sedes could be found From 3619330e98136f42fd45cdbcb8a1a7eabc81de02 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Thu, 26 Mar 2015 09:29:21 +0100 Subject: [PATCH 05/29] support optional items in lists --- rlp/sedes/lists.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rlp/sedes/lists.py b/rlp/sedes/lists.py index 7b772f2..108b76c 100644 --- a/rlp/sedes/lists.py +++ b/rlp/sedes/lists.py @@ -23,10 +23,12 @@ def is_sequence(obj): class List(list): + """A sedes for lists, implemented as a list of other sedes objects.""" - def __init__(self, elements=[]): + def __init__(self, elements=[], strict=True): super(List, self).__init__() + self.strict = strict for e in elements: if is_sedes(e): self.append(e) @@ -39,7 +41,7 @@ def __init__(self, elements=[]): def serialize(self, obj): if not is_sequence(obj): raise SerializationError('Can only serialize sequences', obj) - if len(self) != len(obj): + if self.strict and len(self) != len(obj): raise SerializationError('List has wrong length', obj) return [sedes.serialize(element) for element, sedes in zip(obj, self)] From 7d8f5a143a702911ffbcd62ee8fd90725e062a05 Mon Sep 17 00:00:00 2001 From: jnnk Date: Thu, 26 Mar 2015 10:13:32 +0100 Subject: [PATCH 06/29] Improved docstring --- rlp/sedes/lists.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/rlp/sedes/lists.py b/rlp/sedes/lists.py index 108b76c..39630d3 100644 --- a/rlp/sedes/lists.py +++ b/rlp/sedes/lists.py @@ -24,7 +24,13 @@ def is_sequence(obj): class List(list): - """A sedes for lists, implemented as a list of other sedes objects.""" + """A sedes for lists, implemented as a list of other sedes objects. + + :param strict: If true (de)serializing lists that have a length not + matching the sedes length will result in an error. If false + (de)serialization will stop as soon as either one of the + lists runs out of elements. + """ def __init__(self, elements=[], strict=True): super(List, self).__init__() From 827161ed3324bd1b6f3c52392f04e403d4e7465d Mon Sep 17 00:00:00 2001 From: JahPowerBit Date: Thu, 26 Mar 2015 11:50:30 +0000 Subject: [PATCH 07/29] bytearray to string --- rlp/utils_py3.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rlp/utils_py3.py b/rlp/utils_py3.py index 4ba1f72..dc72cb5 100644 --- a/rlp/utils_py3.py +++ b/rlp/utils_py3.py @@ -13,6 +13,8 @@ class Atomic(metaclass = abc.ABCMeta): def str_to_bytes(value): + if isinstance(value, bytearray): + value = bytes(value) if isinstance(value, bytes): return value return bytes(value, 'utf-8') From 78f402a2ef41d5286acd286b9b1c88b8d4f2d9ee Mon Sep 17 00:00:00 2001 From: jnnk Date: Mon, 30 Mar 2015 12:00:59 +0200 Subject: [PATCH 08/29] Implemented rlp.peek --- rlp/__init__.py | 2 +- rlp/lazy.py | 60 ++++++++++++++++++++++++++++++++++++---------- tests/test_lazy.py | 24 +++++++++++++++---- 3 files changed, 69 insertions(+), 17 deletions(-) diff --git a/rlp/__init__.py b/rlp/__init__.py index 68dc9be..23ecebf 100644 --- a/rlp/__init__.py +++ b/rlp/__init__.py @@ -2,5 +2,5 @@ from .codec import encode, decode, infer_sedes from .exceptions import RLPException, EncodingError, DecodingError, \ SerializationError, DeserializationError -from .lazy import decode_lazy, LazyList +from .lazy import decode_lazy, peek, LazyList from .sedes import Serializable diff --git a/rlp/lazy.py b/rlp/lazy.py index 5b0d4bc..671c082 100644 --- a/rlp/lazy.py +++ b/rlp/lazy.py @@ -1,6 +1,7 @@ -from collections import Sequence -from .exceptions import DecodingError +from collections import Iterable, Sequence from .codec import consume_length_prefix, consume_payload +from .exceptions import DecodingError +from .utils import Atomic def decode_lazy(rlp, sedes=None, **sedes_kwargs): @@ -18,9 +19,9 @@ def decode_lazy(rlp, sedes=None, **sedes_kwargs): "vertical lazyness" can be preserved. :param rlp: the RLP string to decode - :param sedes: an object implementing a method ``deserialize(code)`` - which is used as described above, or ``None`` if no - deserialization should be performed + :param sedes: an object implementing a method ``deserialize(code)`` which + is used as described above, or ``None`` if no + deserialization should be performed :param \*\*sedes_kwargs: additional keyword arguments that will be passed to the deserializers :returns: either the already decoded and deserialized object (if encoded as @@ -81,31 +82,31 @@ def __init__(self, rlp, start, end, sedes=None, **sedes_kwargs): self.start = start self.end = end self.index = start - self.elements_ = [] + self._elements = [] self.len_ = None self.sedes = sedes self.sedes_kwargs = sedes_kwargs def next(self): if self.index == self.end: - self.len_ = len(self.elements_) + self.len_ = len(self._elements) raise StopIteration assert self.index < self.end item, end = consume_item_lazy(self.rlp, self.index) self.index = end if self.sedes: item = self.sedes.deserialize(item, **self.sedes_kwargs) - self.elements_.append(item) + self._elements.append(item) return item def __getitem__(self, i): try: - while len(self.elements_) <= i: + while len(self._elements) <= i: self.next() except StopIteration: assert self.index == self.end raise IndexError('Index %d out of range' % i) - return self.elements_[i] + return self._elements[i] def __len__(self): if not self.len_: @@ -113,5 +114,40 @@ def __len__(self): while True: self.next() except StopIteration: - self.len_ = len(self.elements_) - return self.len_ + self._len = len(self._elements) + return self._len + + +def peek(rlp, index, sedes=None): + """Get a specific element from an rlp encoded nested list. + + This function uses :func:`rlp.decode_lazy` and, thus, decodes only the + necessary parts of the string. + + Usage example:: + + >>> rlpdata = rlp.encode([1, 2, [3, [4, 5]]]) + >>> rlp.peek(rlpdata, 0, rlp.sedes.big_endian_int) + 1 + >>> rlp.peek(rlpdata, [2, 0], rlp.sedes.big_endian_int) + 3 + + :param rlp: the rlp string + :param index: the index of the element to peek at (can be a list for + nested data) + :param sedes: a sedes used to deserialize the peeked at object, or `None` + if no deserialization should be performed + :raises: :exc:`IndexError` if `index` is invalid (out of range or too many + levels) + """ + ll = decode_lazy(rlp) + if not isinstance(index, Iterable): + index = [index] + for i in index: + if isinstance(ll, Atomic): + raise IndexError('Too many indices given') + ll = ll[i] + if sedes: + return sedes.deserialize(ll) + else: + return ll diff --git a/tests/test_lazy.py b/tests/test_lazy.py index b8abfde..450b7e5 100644 --- a/tests/test_lazy.py +++ b/tests/test_lazy.py @@ -29,6 +29,12 @@ def test_string(): assert isinstance(dec(), bytes) assert len(dec()) == len(s) assert dec() == s + assert rlp.peek(rlp.encode(s), []) == s + with pytest.raises(IndexError): + rlp.peek(rlp.encode(s), 0) + with pytest.raises(IndexError): + rlp.peek(rlp.encode(s), [0]) + def test_nested_list(): @@ -38,13 +44,13 @@ def test_nested_list(): assert len(dec()) == len(l) assert evaluate(dec()) == l with pytest.raises(IndexError): - assert dec()[0][0] + dec()[0][0] with pytest.raises(IndexError): - assert dec()[1][1] + dec()[1][1] with pytest.raises(IndexError): - assert dec()[2][3] + dec()[2][3] with pytest.raises(IndexError): - assert dec()[3] + dec()[3] def test_sedes(): @@ -63,3 +69,13 @@ def test_sedes(): assert invalid_lazy[1] == l[1] with pytest.raises(DeserializationError): invalid_lazy[2] + + +def test_peek(): + assert rlp.peek(rlp.encode(b''), []) == b'' + nested = rlp.encode([0, 1, [2, 3]]) + assert rlp.peek(nested, [2, 0], big_endian_int) == 2 + for index in [3, [3], [0, 0], [2, 2], [2, 1, 0]]: + with pytest.raises(IndexError): + rlp.peek(nested, index) + assert rlp.peek(nested, 2, CountableList(big_endian_int)) == [2, 3] From 5f00ca9b0ec57a869d34ad9b930dd7d355f5ef08 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Tue, 31 Mar 2015 03:44:22 +0200 Subject: [PATCH 09/29] support pytearray --- rlp/utils_py2.py | 3 +++ tests/test_bytearray.py | 7 +++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/test_bytearray.py diff --git a/rlp/utils_py2.py b/rlp/utils_py2.py index 7f533aa..f6d363b 100644 --- a/rlp/utils_py2.py +++ b/rlp/utils_py2.py @@ -2,6 +2,7 @@ class Atomic(object): + """ABC for objects that can be RLP encoded as is.""" __metaclass__ = abc.ABCMeta @@ -37,6 +38,8 @@ def is_integer(value): def bytes_to_int_array(value): + if isinstance(value, bytearray): + return list(value) return [ord(c) for c in value] diff --git a/tests/test_bytearray.py b/tests/test_bytearray.py new file mode 100644 index 0000000..6bb6797 --- /dev/null +++ b/tests/test_bytearray.py @@ -0,0 +1,7 @@ +import rlp + + +def test_bytearray(): + e = rlp.encode('abc') + d = rlp.decode(e) + d = rlp.decode(bytearray(e)) From 2cdd4a149d8e81362409df901ced2904f1cdc4ae Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Wed, 1 Apr 2015 19:45:38 +0200 Subject: [PATCH 10/29] 10x deserialization sepeedup --- rlp/utils_py2.py | 4 +--- setup.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/rlp/utils_py2.py b/rlp/utils_py2.py index f6d363b..ff3705b 100644 --- a/rlp/utils_py2.py +++ b/rlp/utils_py2.py @@ -38,9 +38,7 @@ def is_integer(value): def bytes_to_int_array(value): - if isinstance(value, bytearray): - return list(value) - return [ord(c) for c in value] + return memoryview(value).tolist() def decode_hex(s): diff --git a/setup.py b/setup.py index 1ef2e92..8e01b59 100755 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def run_tests(self): setup( name='rlp', - version='0.3.4', + version='0.3.6', description="A package for encoding and decoding data in and from Recursive Length Prefix notation", long_description=readme, author="jnnk", From adad3f5c0d5038308fb7e41cdbf6286faaca1571 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Wed, 1 Apr 2015 21:13:10 +0200 Subject: [PATCH 11/29] be less strict --- rlp/sedes/lists.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rlp/sedes/lists.py b/rlp/sedes/lists.py index 39630d3..e216a35 100644 --- a/rlp/sedes/lists.py +++ b/rlp/sedes/lists.py @@ -47,7 +47,7 @@ def __init__(self, elements=[], strict=True): def serialize(self, obj): if not is_sequence(obj): raise SerializationError('Can only serialize sequences', obj) - if self.strict and len(self) != len(obj): + if self.strict and len(self) != len(obj) or len(self) < len(obj): raise SerializationError('List has wrong length', obj) return [sedes.serialize(element) for element, sedes in zip(obj, self)] @@ -56,13 +56,14 @@ def deserialize(self, serial): if not is_sequence(serial): raise DeserializationError('Can only deserialize sequences', serial) - if len(serial) != len(self): + if len(serial) > len(self) or self.strict and len(serial) != len(self): raise DeserializationError('List has wrong length', serial) return [sedes.deserialize(element) for element, sedes in zip(serial, self)] class CountableList(object): + """A sedes for lists of arbitrary length. :param element_sedes: when (de-)serializing a list, this sedes will be @@ -85,6 +86,7 @@ def deserialize(self, serial): class Serializable(object): + """Base class for objects which can be serialized into RLP lists. :attr:`fields` defines which attributes are serialized and how this is @@ -153,7 +155,7 @@ def serialize(cls, obj): def deserialize(cls, serial, **kwargs): values = cls.get_sedes().deserialize(serial) params = {field: value for (field, _), value - in zip(cls.fields, values)} + in zip(cls.fields, values)} return cls(**dict(list(params.items()) + list(kwargs.items()))) @classmethod @@ -161,6 +163,6 @@ def exclude(cls, excluded_fields): """Create a new sedes considering only a reduced set of fields.""" class SerializableExcluded(cls): fields = [(field, sedes) for field, sedes in cls.fields - if field not in excluded_fields] + if field not in excluded_fields] _sedes = None return SerializableExcluded From 26a7f3322ebdea635f9d3078b6cd14a25979a9f7 Mon Sep 17 00:00:00 2001 From: jnnk Date: Thu, 2 Apr 2015 12:02:38 +0200 Subject: [PATCH 12/29] Convert to bytes before decoding --- rlp/codec.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index 467817a..f3178a7 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -88,9 +88,6 @@ def consume_length_prefix(rlp, start): ``length`` is the length of the payload in bytes, and ``end`` is the position of the first payload byte in the rlp string """ - if isinstance(rlp, str): - rlp = str_to_bytes(rlp) - b0 = bytes_to_int_array(rlp)[start] if b0 < 128: # single byte return (str, 1, start) @@ -164,6 +161,7 @@ def decode(rlp, sedes=None, strict=True, **kwargs): the root item and `strict` is true :raises: :exc:`rlp.DeserializationError` if the deserialization fails """ + rlp = str_to_bytes(rlp) try: item, end = consume_item(rlp, 0) except IndexError: From 1a9f58134b7f99ca911304f05820ec4456eb2650 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Fri, 3 Apr 2015 13:46:13 -0400 Subject: [PATCH 13/29] Fixed big endian decoding --- rlp/sedes/big_endian_int.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rlp/sedes/big_endian_int.py b/rlp/sedes/big_endian_int.py index 2df8512..6ca6a7e 100644 --- a/rlp/sedes/big_endian_int.py +++ b/rlp/sedes/big_endian_int.py @@ -1,5 +1,5 @@ from ..exceptions import DeserializationError, SerializationError -from ..utils import int_to_big_endian, is_integer, encode_hex +from ..utils import int_to_big_endian, is_integer, encode_hex, ascii_chr class BigEndianInt(object): @@ -35,7 +35,7 @@ def deserialize(self, serial): if self.l is not None and len(serial) != self.l: raise DeserializationError('Invalid serialization (wrong size)', serial) - if self.l is None and len(serial) > 1 and serial[0] == 0: + if self.l is None and len(serial) > 1 and serial[0] == ascii_chr(0): raise DeserializationError('Invalid serialization (not minimal ' 'length)', serial) From 83ff44d9029ed13daa65f6f13884e20a5bfa7e5c Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Sat, 4 Apr 2015 22:37:37 +0200 Subject: [PATCH 14/29] version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8e01b59..4a04436 100755 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def run_tests(self): setup( name='rlp', - version='0.3.6', + version='0.3.7', description="A package for encoding and decoding data in and from Recursive Length Prefix notation", long_description=readme, author="jnnk", From 36ee93acc94cca21abde9506da844980b0abcffc Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Tue, 7 Apr 2015 13:28:36 +0200 Subject: [PATCH 15/29] travis and coveralls --- .travis.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index edfff09..76568e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,20 @@ language: python + python: - "2.7" + - "3.3" - "3.4" -install: python setup.py install -script: py.test \ No newline at end of file + - "pypy" + - "pypy3" + +# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors +install: + - pip install -r requirements.txt + - pip install coveralls + +# command to run tests, e.g. python setup.py test +script: + - coverage run --source pyethapp setup.py test + +after_success: + - coveralls From 49f2d7ee9d2b8ec6f285bc96c042d2c66cc9dac8 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Tue, 7 Apr 2015 13:47:29 +0200 Subject: [PATCH 16/29] badges --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 4d65459..0aa84f4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ pyrlp ===== +[![Build Status](https://travis-ci.org/ethereum/pyrlp.svg?branch=develop)](https://travis-ci.org/ethereum/pyrlp) +[![Coverage Status](https://coveralls.io/repos/ethereum/pyrlp/badge.svg)](https://coveralls.io/r/ethereum/pyrlp) +[![PyPI version](https://badge.fury.io/py/rlp.svg)](http://badge.fury.io/py/rlp) + A Python implementation of Recursive Length Prefix encoding (RLP). You can find the specification of the standard in the [Ethereum wiki](https://github.com/ethereum/wiki/wiki/RLP) and the From 882ffe592559d18af33d1b28a3ee609cdcc178cb Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Wed, 15 Apr 2015 09:28:52 +0200 Subject: [PATCH 17/29] w/o unnecessary list, 30x speedup --- rlp/codec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index f3178a7..23ab8c5 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -43,7 +43,7 @@ def encode(obj, sedes=None, infer_serializer=True): def encode_raw(item): """RLP encode (a nested sequence of) :class:`Atomic`s.""" if isinstance(item, Atomic): - if len(item) == 1 and bytes_to_int_array(item)[0] < 128: + if len(item) == 1 and ord(item[0]) < 128: return str_to_bytes(item) payload = str_to_bytes(item) prefix_offset = 128 # string @@ -88,7 +88,7 @@ def consume_length_prefix(rlp, start): ``length`` is the length of the payload in bytes, and ``end`` is the position of the first payload byte in the rlp string """ - b0 = bytes_to_int_array(rlp)[start] + b0 = ord(rlp[start]) if b0 < 128: # single byte return (str, 1, start) elif b0 < 128 + 56: # short string From ac193d7232182640f0a64221c8c2bf0778f8fcf4 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Wed, 15 Apr 2015 11:35:05 +0200 Subject: [PATCH 18/29] more speedups --- rlp/sedes/lists.py | 3 ++- rlp/utils_py2.py | 8 +------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/rlp/sedes/lists.py b/rlp/sedes/lists.py index e216a35..b4cda36 100644 --- a/rlp/sedes/lists.py +++ b/rlp/sedes/lists.py @@ -14,7 +14,8 @@ def is_sedes(obj): A sedes object is characterized by having the methods `serialize(obj)` and `deserialize(serial)`. """ - return all(hasattr(obj, m) for m in ('serialize', 'deserialize')) + # return all(hasattr(obj, m) for m in ('serialize', 'deserialize')) + return hasattr(obj, 'serialize') and hasattr(obj, 'deserialize') def is_sequence(obj): diff --git a/rlp/utils_py2.py b/rlp/utils_py2.py index ff3705b..609f8af 100644 --- a/rlp/utils_py2.py +++ b/rlp/utils_py2.py @@ -11,13 +11,7 @@ class Atomic(object): Atomic.register(bytearray) Atomic.register(unicode) - -def str_to_bytes(value): - return str(value) - - -def bytes_to_str(value): - return str(value) +str_to_bytes = bytes_to_str = lambda a: a def ascii_chr(value): From a24e0b918501b5f4ef92e405cadf69307c67dd5b Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Wed, 15 Apr 2015 12:07:06 +0200 Subject: [PATCH 19/29] refix str_to_bytes --- rlp/utils_py2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rlp/utils_py2.py b/rlp/utils_py2.py index 609f8af..86f8c02 100644 --- a/rlp/utils_py2.py +++ b/rlp/utils_py2.py @@ -11,7 +11,7 @@ class Atomic(object): Atomic.register(bytearray) Atomic.register(unicode) -str_to_bytes = bytes_to_str = lambda a: a +str_to_bytes = bytes_to_str = str def ascii_chr(value): From e3e1278e0e84301579a8cc1273496e7d0a7b0c36 Mon Sep 17 00:00:00 2001 From: jnnk Date: Wed, 15 Apr 2015 14:29:46 +0200 Subject: [PATCH 20/29] Removed bytes_to_int_array, fixed python 3 support --- rlp/codec.py | 7 +++---- rlp/utils_py2.py | 7 +++---- rlp/utils_py3.py | 12 ++++++++---- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index 23ab8c5..9016ab2 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -1,8 +1,7 @@ import collections import sys from .exceptions import EncodingError, DecodingError -from .utils import (Atomic, str_to_bytes, is_integer, bytes_to_int_array, - ascii_chr) +from .utils import Atomic, str_to_bytes, is_integer, ascii_chr, safe_ord from .sedes.binary import Binary as BinaryClass from .sedes import big_endian_int, binary from .sedes.lists import List, is_sedes @@ -43,7 +42,7 @@ def encode(obj, sedes=None, infer_serializer=True): def encode_raw(item): """RLP encode (a nested sequence of) :class:`Atomic`s.""" if isinstance(item, Atomic): - if len(item) == 1 and ord(item[0]) < 128: + if len(item) == 1 and safe_ord(item[0]) < 128: return str_to_bytes(item) payload = str_to_bytes(item) prefix_offset = 128 # string @@ -88,7 +87,7 @@ def consume_length_prefix(rlp, start): ``length`` is the length of the payload in bytes, and ``end`` is the position of the first payload byte in the rlp string """ - b0 = ord(rlp[start]) + b0 = safe_ord(rlp[start]) if b0 < 128: # single byte return (str, 1, start) elif b0 < 128 + 56: # short string diff --git a/rlp/utils_py2.py b/rlp/utils_py2.py index 86f8c02..713f47c 100644 --- a/rlp/utils_py2.py +++ b/rlp/utils_py2.py @@ -31,10 +31,6 @@ def is_integer(value): return isinstance(value, (int, long)) -def bytes_to_int_array(value): - return memoryview(value).tolist() - - def decode_hex(s): if not isinstance(s, (str, unicode)): raise TypeError('Value must be an instance of str or unicode') @@ -45,3 +41,6 @@ def encode_hex(s): if not isinstance(s, (str, unicode)): raise TypeError('Value must be an instance of str or unicode') return s.encode('hex') + + +safe_ord = ord diff --git a/rlp/utils_py3.py b/rlp/utils_py3.py index dc72cb5..20e619c 100644 --- a/rlp/utils_py3.py +++ b/rlp/utils_py3.py @@ -39,10 +39,6 @@ def is_integer(value): return isinstance(value, int) -def bytes_to_int_array(value): - return value - - def decode_hex(s): if isinstance(s, str): return bytes.fromhex(s) @@ -57,3 +53,11 @@ def encode_hex(b): if isinstance(b, bytes): return binascii.hexlify(b) raise TypeError('Value must be an instance of str or bytes') + + +def safe_ord(c): + try: + return ord(c) + except TypeError: + assert isinstance(c, int) + return c From 07b3c4e9d19a9c36885eea5c9b78bc2bd0a7371d Mon Sep 17 00:00:00 2001 From: jnnk Date: Fri, 17 Apr 2015 15:18:14 +0200 Subject: [PATCH 21/29] Improved pure decoding speed by 50% (measured for Python 2) --- rlp/codec.py | 6 +++--- rlp/sedes/big_endian_int.py | 4 ++-- rlp/utils_py2.py | 10 ++++++++++ rlp/utils_py3.py | 4 ++++ 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index 9016ab2..37e20b2 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -1,7 +1,7 @@ import collections import sys from .exceptions import EncodingError, DecodingError -from .utils import Atomic, str_to_bytes, is_integer, ascii_chr, safe_ord +from .utils import Atomic, str_to_bytes, is_integer, ascii_chr, safe_ord, big_endian_to_int from .sedes.binary import Binary as BinaryClass from .sedes import big_endian_int, binary from .sedes.lists import List, is_sedes @@ -94,13 +94,13 @@ def consume_length_prefix(rlp, start): return (str, b0 - 128, start + 1) elif b0 < 192: # long string ll = b0 - 128 - 56 + 1 - l = big_endian_int.deserialize(rlp[start + 1:start + 1 + ll]) + l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) return (str, l, start + 1 + ll) elif b0 < 192 + 56: # short list return (list, b0 - 192, start + 1) else: # long list ll = b0 - 192 - 56 + 1 - l = big_endian_int.deserialize(rlp[start + 1:start + 1 + ll]) + l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) if l < 56: raise DecodingError('Long list prefix used for short list', rlp) return (list, l, start + 1 + ll) diff --git a/rlp/sedes/big_endian_int.py b/rlp/sedes/big_endian_int.py index 6ca6a7e..f477d36 100644 --- a/rlp/sedes/big_endian_int.py +++ b/rlp/sedes/big_endian_int.py @@ -1,5 +1,5 @@ from ..exceptions import DeserializationError, SerializationError -from ..utils import int_to_big_endian, is_integer, encode_hex, ascii_chr +from ..utils import int_to_big_endian, big_endian_to_int, is_integer, ascii_chr class BigEndianInt(object): @@ -40,6 +40,6 @@ def deserialize(self, serial): 'length)', serial) serial = serial or b'\x00' - return int(encode_hex(serial), 16) + return big_endian_to_int(serial) big_endian_int = BigEndianInt() diff --git a/rlp/utils_py2.py b/rlp/utils_py2.py index 713f47c..2fcef71 100644 --- a/rlp/utils_py2.py +++ b/rlp/utils_py2.py @@ -1,4 +1,5 @@ import abc +import struct class Atomic(object): @@ -27,6 +28,15 @@ def int_to_big_endian(value): return s +def big_endian_to_int(value): + if len(value) == 1: + return ord(value) + elif len(value) <= 8: + return struct.unpack('>Q', value.rjust(8, '\x00'))[0] + else: + return int(encode_hex(value), 16) + + def is_integer(value): return isinstance(value, (int, long)) diff --git a/rlp/utils_py3.py b/rlp/utils_py3.py index 20e619c..d409e09 100644 --- a/rlp/utils_py3.py +++ b/rlp/utils_py3.py @@ -35,6 +35,10 @@ def int_to_big_endian(value): return (value).to_bytes(byte_length, byteorder='big') +def big_endian_to_int(value): + return int.from_bytes(value, byteorder='big') + + def is_integer(value): return isinstance(value, int) From dcff9212056d4fe5a8330a80ed5710688c1cea8e Mon Sep 17 00:00:00 2001 From: jnnk Date: Fri, 17 Apr 2015 15:31:58 +0200 Subject: [PATCH 22/29] Improved pure encoding speed by 20% (measured with Python 2) --- rlp/codec.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index 37e20b2..ead0f61 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -1,7 +1,8 @@ import collections import sys from .exceptions import EncodingError, DecodingError -from .utils import Atomic, str_to_bytes, is_integer, ascii_chr, safe_ord, big_endian_to_int +from .utils import (Atomic, str_to_bytes, is_integer, ascii_chr, safe_ord, big_endian_to_int, + int_to_big_endian) from .sedes.binary import Binary as BinaryClass from .sedes import big_endian_int, binary from .sedes.lists import List, is_sedes @@ -71,7 +72,7 @@ def length_prefix(length, offset): if length < 56: return ascii_chr(offset + length) elif length < 256**8: - length_string = big_endian_int.serialize(length) + length_string = int_to_big_endian(length) return ascii_chr(offset + 56 - 1 + len(length_string)) + length_string else: raise ValueError('Length greater than 256**8') From 78f9ac7d1e303bc47be0a3123060ba29ef558eaf Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Mon, 20 Apr 2015 19:21:10 +0200 Subject: [PATCH 23/29] hack to list serialize existing rlp --- rlp/codec.py | 13 +++++++++++-- rlp/sedes/lists.py | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index ead0f61..ea0b9c1 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -1,6 +1,7 @@ import collections import sys from .exceptions import EncodingError, DecodingError + from .utils import (Atomic, str_to_bytes, is_integer, ascii_chr, safe_ord, big_endian_to_int, int_to_big_endian) from .sedes.binary import Binary as BinaryClass @@ -40,10 +41,18 @@ def encode(obj, sedes=None, infer_serializer=True): return encode_raw(item) +class RLPData(str): + + "wraper to mark already rlp serialized data" + pass + + def encode_raw(item): """RLP encode (a nested sequence of) :class:`Atomic`s.""" - if isinstance(item, Atomic): - if len(item) == 1 and safe_ord(item[0]) < 128: + if isinstance(item, RLPData): + return item + elif isinstance(item, Atomic): + if len(item) == 1 and ord(item[0]) < 128: return str_to_bytes(item) payload = str_to_bytes(item) prefix_offset = 128 # string diff --git a/rlp/sedes/lists.py b/rlp/sedes/lists.py index b4cda36..a1a2fa6 100644 --- a/rlp/sedes/lists.py +++ b/rlp/sedes/lists.py @@ -145,11 +145,11 @@ def get_sedes(cls): @classmethod def serialize(cls, obj): if not hasattr(obj, 'fields'): - raise SerializationError('Cannot serialize this object', obj) + raise SerializationError('Cannot serialize this object (no fields)', obj) try: field_values = [getattr(obj, field) for field, _ in cls.fields] except AttributeError: - raise SerializationError('Cannot serialize this object', obj) + raise SerializationError('Cannot serialize this object (missing attribute)', obj) return cls.get_sedes().serialize(field_values) @classmethod From f4419085f7e09bfd3a3c32b43be1c925a1412104 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Tue, 21 Apr 2015 11:46:18 +0200 Subject: [PATCH 24/29] version up --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4a04436..80d307b 100755 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def run_tests(self): setup( name='rlp', - version='0.3.7', + version='0.3.8', description="A package for encoding and decoding data in and from Recursive Length Prefix notation", long_description=readme, author="jnnk", From ecf1aa1001ba2c36b39ac7c09a7478561555c1f6 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Tue, 2 Jun 2015 11:52:48 +0200 Subject: [PATCH 25/29] rm: function overhead --- rlp/utils_py2.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rlp/utils_py2.py b/rlp/utils_py2.py index 2fcef71..fcc1ba0 100644 --- a/rlp/utils_py2.py +++ b/rlp/utils_py2.py @@ -13,10 +13,7 @@ class Atomic(object): Atomic.register(unicode) str_to_bytes = bytes_to_str = str - - -def ascii_chr(value): - return chr(value) +ascii_chr = chr def int_to_big_endian(value): From 2afec417835eb62e3101dfe771a5d130b0bf4939 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Thu, 11 Jun 2015 03:28:13 -0400 Subject: [PATCH 26/29] zero bytes prefix fix --- rlp/codec.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rlp/codec.py b/rlp/codec.py index ea0b9c1..d5e6a4d 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -52,7 +52,7 @@ def encode_raw(item): if isinstance(item, RLPData): return item elif isinstance(item, Atomic): - if len(item) == 1 and ord(item[0]) < 128: + if len(item) == 1 and safe_ord(item[0]) < 128: return str_to_bytes(item) payload = str_to_bytes(item) prefix_offset = 128 # string @@ -104,12 +104,16 @@ def consume_length_prefix(rlp, start): return (str, b0 - 128, start + 1) elif b0 < 192: # long string ll = b0 - 128 - 56 + 1 + if rlp[start + 1] == b'\x00': + raise DecodingError('Length starts with zero bytes', rlp) l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) return (str, l, start + 1 + ll) elif b0 < 192 + 56: # short list return (list, b0 - 192, start + 1) else: # long list ll = b0 - 192 - 56 + 1 + if rlp[start + 1] == b'\x00': + raise DecodingError('Length starts with zero bytes', rlp) l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) if l < 56: raise DecodingError('Long list prefix used for short list', rlp) From a4d6e0178e00b158b516e9b24f4e9fdd7ed8bfb9 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Thu, 11 Jun 2015 04:54:36 -0400 Subject: [PATCH 27/29] Some fixes to properly recognize invalid RLP --- rlp/codec.py | 5 +++-- rlp/sedes/big_endian_int.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/rlp/codec.py b/rlp/codec.py index d5e6a4d..b2cea19 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -101,10 +101,11 @@ def consume_length_prefix(rlp, start): if b0 < 128: # single byte return (str, 1, start) elif b0 < 128 + 56: # short string + assert b0 - 128 != 1 or safe_ord(rlp[start + 1]) >= 128 return (str, b0 - 128, start + 1) elif b0 < 192: # long string ll = b0 - 128 - 56 + 1 - if rlp[start + 1] == b'\x00': + if rlp[start + 1:start + 2] == b'\x00': raise DecodingError('Length starts with zero bytes', rlp) l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) return (str, l, start + 1 + ll) @@ -112,7 +113,7 @@ def consume_length_prefix(rlp, start): return (list, b0 - 192, start + 1) else: # long list ll = b0 - 192 - 56 + 1 - if rlp[start + 1] == b'\x00': + if rlp[start + 1:start + 2] == b'\x00': raise DecodingError('Length starts with zero bytes', rlp) l = big_endian_to_int(rlp[start + 1:start + 1 + ll]) if l < 56: diff --git a/rlp/sedes/big_endian_int.py b/rlp/sedes/big_endian_int.py index f477d36..7c6c9db 100644 --- a/rlp/sedes/big_endian_int.py +++ b/rlp/sedes/big_endian_int.py @@ -35,7 +35,7 @@ def deserialize(self, serial): if self.l is not None and len(serial) != self.l: raise DeserializationError('Invalid serialization (wrong size)', serial) - if self.l is None and len(serial) > 1 and serial[0] == ascii_chr(0): + if self.l is None and len(serial) > 0 and serial[0:1] == ascii_chr(0): raise DeserializationError('Invalid serialization (not minimal ' 'length)', serial) From 49f5d2d49a93b6ddcc47dd99fb6870d05d9f2b1d Mon Sep 17 00:00:00 2001 From: jnnk Date: Fri, 12 Jun 2015 10:24:18 +0200 Subject: [PATCH 28/29] Added tests for non minimal encodings from https://github.com/ethereum/yellowpaper/issues/116 --- rlp/codec.py | 3 ++- tests/test_invalid.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/rlp/codec.py b/rlp/codec.py index b2cea19..6385245 100644 --- a/rlp/codec.py +++ b/rlp/codec.py @@ -101,7 +101,8 @@ def consume_length_prefix(rlp, start): if b0 < 128: # single byte return (str, 1, start) elif b0 < 128 + 56: # short string - assert b0 - 128 != 1 or safe_ord(rlp[start + 1]) >= 128 + if b0 - 128 == 1 and safe_ord(rlp[start + 1]) < 128: + raise DecodingError('Encoded as short string although single byte was possible', rlp) return (str, b0 - 128, start + 1) elif b0 < 192: # long string ll = b0 - 128 - 56 + 1 diff --git a/tests/test_invalid.py b/tests/test_invalid.py index 1e89684..203d79b 100644 --- a/tests/test_invalid.py +++ b/tests/test_invalid.py @@ -10,6 +10,11 @@ '\x83do', '\xc7\xc0\xc1\xc0\xc3\xc0\xc1\xc0\xff', '\xc7\xc0\xc1\xc0\xc3\xc0\xc1' + '\x81\x02', + '\xb8\x00', + '\xb9\x00\x00', + '\xba\x00\x02\xff\xff', + '\x81\x54' ) From 45160b64cc13c2c4b688965aec5028bef2e67f94 Mon Sep 17 00:00:00 2001 From: Heiko Heiko Date: Thu, 30 Jul 2015 17:01:54 +0200 Subject: [PATCH 29/29] MIT license --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 766a0a5..34ba3e4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) {{{year}}} {{{fullname}}} +Copyright (c) 2015 Jnnk, Vitalik Buterin Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal