Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SSZ impl fixes #960

Merged
merged 5 commits into from
Apr 18, 2019
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 103 additions & 27 deletions test_libs/pyspec/eth2spec/utils/minimal_ssz.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def __init__(self, **kwargs):

def __eq__(self, other):
return (
self.fields == other.fields and
self.serialize() == other.serialize()
self.fields == other.fields and
self.serialize() == other.serialize()
protolambda marked this conversation as resolved.
Show resolved Hide resolved
)

def __hash__(self):
Expand Down Expand Up @@ -58,18 +58,39 @@ def __len__(self):


def is_basic(typ):
return isinstance(typ, str) and (typ[:4] in ('uint', 'bool') or typ == 'byte')
# if not a string, it is a complex, and cannot be basic
if not isinstance(typ, str):
return False
# "uintN": N-bit unsigned integer (where N in [8, 16, 32, 64, 128, 256])
if typ[:4] == 'uint' and typ[4:] in ['8', '16', '32', '64', '128', '256']:
return True
# "bool": True or False
if typ == 'bool':
return True
# alias: "byte" -> "uint8"
if typ == 'byte':
return True
# default
return False
protolambda marked this conversation as resolved.
Show resolved Hide resolved


def is_constant_sized(typ):
# basic objects are fixed size by definition
if is_basic(typ):
return True
# dynamic size array type, "list": [elem_type].
# Not constant size by definition.
elif isinstance(typ, list) and len(typ) == 1:
return is_constant_sized(typ[0])
elif isinstance(typ, list) and len(typ) == 2:
return False
# fixed size array type, "vector": [elem_type, length]
# Constant size, but only if the elements are.
elif isinstance(typ, list) and len(typ) == 2:
return is_constant_sized(typ[0])
# bytes array (fixed or dynamic size)
elif isinstance(typ, str) and typ[:5] == 'bytes':
return len(typ) > 5
# if no length suffix, it has a dynamic size
return typ != 'bytes'
# containers are only constant-size if all of the fields are constant size.
elif hasattr(typ, 'fields'):
for subtype in typ.fields.values():
if not is_constant_sized(subtype):
Expand All @@ -90,35 +111,61 @@ def coerce_to_bytes(x):
raise Exception("Expecting bytes")


def encode_bytes(value):
serialized_bytes = coerce_to_bytes(value)
assert len(serialized_bytes) < 2**(8 * BYTES_PER_LENGTH_PREFIX)
serialized_length = len(serialized_bytes).to_bytes(BYTES_PER_LENGTH_PREFIX, 'little')
return serialized_length + serialized_bytes


def encode_variable_size_container(values, types):
return encode_bytes(encode_fixed_size_container(values, types))


def encode_fixed_size_container(values, types):
return b''.join([serialize_value(v, typ) for (v, typ) in zip(values, types)])


def serialize_value(value, typ=None):
if typ is None:
typ = infer_type(value)
# "uintN"
if isinstance(typ, str) and typ[:4] == 'uint':
length = int(typ[4:])
assert length in (8, 16, 32, 64, 128, 256)
return value.to_bytes(length // 8, 'little')
elif typ == 'bool':
# "bool"
elif isinstance(typ, str) and typ == 'bool':
assert value in (True, False)
return b'\x01' if value is True else b'\x00'
elif (isinstance(typ, list) and len(typ) == 1) or typ == 'bytes':
serialized_bytes = coerce_to_bytes(value) if typ == 'bytes' else b''.join([serialize_value(element, typ[0]) for element in value])
assert len(serialized_bytes) < 2**(8 * BYTES_PER_LENGTH_PREFIX)
serialized_length = len(serialized_bytes).to_bytes(BYTES_PER_LENGTH_PREFIX, 'little')
return serialized_length + serialized_bytes
# Vector:
elif isinstance(typ, list) and len(typ) == 2:
# (regardless of element type, sanity-check if the length reported in the vector type matches the value length)
assert len(value) == typ[1]
return b''.join([serialize_value(element, typ[0]) for element in value])
# If value is fixed-size (i.e. element type is fixed-size):
if is_constant_sized(typ):
return encode_fixed_size_container(value, [typ[0]] * len(value))
# If value is variable-size (i.e. element type is variable-size)
else:
return encode_variable_size_container(value, [typ[0]] * len(value))
# "bytes" (variable size)
elif isinstance(typ, str) and typ == 'bytes':
return encode_bytes(value)
# List
elif isinstance(typ, list) and len(typ) == 1:
return encode_variable_size_container(value, [typ[0]] * len(value))
# "bytesN" (fixed size)
elif isinstance(typ, str) and len(typ) > 5 and typ[:5] == 'bytes':
assert len(value) == int(typ[5:]), (value, int(typ[5:]))
return coerce_to_bytes(value)
# containers
elif hasattr(typ, 'fields'):
serialized_bytes = b''.join([serialize_value(getattr(value, field), subtype) for field, subtype in typ.fields.items()])
values = [getattr(value, field) for field in typ.fields.keys()]
types = list(typ.fields.values())
if is_constant_sized(typ):
return serialized_bytes
return encode_fixed_size_container(values, types)
else:
assert len(serialized_bytes) < 2**(8 * BYTES_PER_LENGTH_PREFIX)
serialized_length = len(serialized_bytes).to_bytes(BYTES_PER_LENGTH_PREFIX, 'little')
return serialized_length + serialized_bytes
return encode_variable_size_container(values, types)
else:
print(value, typ)
raise Exception("Type not recognized")
Expand Down Expand Up @@ -151,42 +198,71 @@ def mix_in_length(root, length):
return hash(root + length.to_bytes(32, 'little'))


# Note: defaults to uint64 for integer type inference due to lack of information.
# Other integer sizes are still supported, see spec.
protolambda marked this conversation as resolved.
Show resolved Hide resolved
def infer_type(value):
if hasattr(value.__class__, 'fields'):
return value.__class__
elif isinstance(value, Vector):
return [infer_type(value[0]) if len(value) > 0 else 'uint64', len(value)]
if len(value) > 0:
return [infer_type(value[0]), len(value)]
else:
# Element type does not matter too much,
# assumed to be a basic type for size-encoding purposes, vector is empty.
return ['uint64']
elif isinstance(value, list):
return [infer_type(value[0])] if len(value) > 0 else ['uint64']
if len(value) > 0:
return [infer_type(value[0])]
else:
# Element type does not matter, list-content size will be encoded regardless, list is empty.
return ['uint64']
elif isinstance(value, (bytes, str)):
return 'bytes'
elif isinstance(value, int):
return 'uint64'
elif isinstance(value, int):
return 'uint64'
protolambda marked this conversation as resolved.
Show resolved Hide resolved
else:
raise Exception("Failed to infer type")


protolambda marked this conversation as resolved.
Show resolved Hide resolved
def hash_tree_root(value, typ=None):
if typ is None:
typ = infer_type(value)
# -------------------------------------
# merkleize(pack(value))
# basic object: merkleize packed version (merkleization pads it to 32 bytes if it is not already)
if is_basic(typ):
return merkleize(pack([value], typ))
elif isinstance(typ, list) and len(typ) == 1 and is_basic(typ[0]):
return mix_in_length(merkleize(pack(value, typ[0])), len(value))
elif isinstance(typ, list) and len(typ) == 1 and not is_basic(typ[0]):
return mix_in_length(merkleize([hash_tree_root(element, typ[0]) for element in value]), len(value))
# or a vector of basic objects
elif isinstance(typ, list) and len(typ) == 2 and is_basic(typ[0]):
assert len(value) == typ[1]
return merkleize(pack(value, typ[0]))
# -------------------------------------
# mix_in_length(merkleize(pack(value)), len(value))
# if value is a list of basic objects
elif isinstance(typ, list) and len(typ) == 1 and is_basic(typ[0]):
return mix_in_length(merkleize(pack(value, typ[0])), len(value))
# (needs some extra work for non-fixed-sized bytes array)
elif typ == 'bytes':
return mix_in_length(merkleize(chunkify(coerce_to_bytes(value))), len(value))
# -------------------------------------
# merkleize([hash_tree_root(element) for element in value])
# if value is a vector of composite objects
elif isinstance(typ, list) and len(typ) == 2 and not is_basic(typ[0]):
return merkleize([hash_tree_root(element, typ[0]) for element in value])
# (needs some extra work for fixed-sized bytes array)
elif isinstance(typ, str) and typ[:5] == 'bytes' and len(typ) > 5:
assert len(value) == int(typ[5:])
return merkleize(chunkify(coerce_to_bytes(value)))
elif isinstance(typ, list) and len(typ) == 2 and not is_basic(typ[0]):
return merkleize([hash_tree_root(element, typ[0]) for element in value])
# or a container
elif hasattr(typ, 'fields'):
return merkleize([hash_tree_root(getattr(value, field), subtype) for field, subtype in typ.fields.items()])
# -------------------------------------
# mix_in_length(merkleize([hash_tree_root(element) for element in value]), len(value))
# if value is a list of composite objects
elif isinstance(typ, list) and len(typ) == 1 and not is_basic(typ[0]):
return mix_in_length(merkleize([hash_tree_root(element, typ[0]) for element in value]), len(value))
# -------------------------------------
else:
raise Exception("Type not recognized")

Expand Down