From 9317f6a5b5bf820acf6ed7ad70015592e85a5250 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Thu, 8 Nov 2018 10:45:13 -0500 Subject: [PATCH 1/5] Added tree hashing algorithm --- specs/simple-serialize.md | 71 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 03af186219..b2708b52fe 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -449,6 +449,77 @@ assert item_index == start + LENGTH_BYTES + length return typ(**values), item_index ``` +### Tree hash + +The below algorithm is defined recursively in the case of lists and containers, and it outputs a value equal to or less than 32 bytes in size. For the final output only (ie. not intermediate outputs), if the output is less than 32 bytes, right-zero-pad it to 32 bytes. The goal is collision resistance *within* each type, not between types. + +We define `hash(x)` as `BLAKE2b-512(x)[0:32]`. + +#### uint: 8/16/24/32/64/256, bool, address, hash32 + +Return the serialization of the value. + +#### bytes, hash96 + +Return the hash of the serialization of the value. + +#### List/Vectors + +First, we define some helpers and then the Merkle tree function. The constant `CHUNK_SIZE` is set to 128. + +```python +# Returns the smallest power of 2 equal to or higher than x +def next_power_of_2(x): + return x if x == 1 else next_power_of_2((x+1) // 2) * 2 + +# Extends data length to a power of 2 by minimally right-zero-padding +def extend_to_power_of_2(data): + return data + b'\x00' * (next_power_of_2(len(data)) - len(data)) + +# Concatenate a list of homogeneous objects into data and pad it +def list_to_glob(lst): + if len(lst[0]) != next_power_of_2(len(lst[0])): + lst = [extend_to_power_of_2(x) for x in lst] + data = b''.join(lst) + # Pad to chunksize + data += b'\x00' * (CHUNKSIZE - (len(data) % CHUNKSIZE or CHUNKSIZE)) + return data + +# Merkle tree hash of a list of items +def merkle_hash(lst): + # Turn list into padded data + data = list_to_glob(lst) + # Store length of list (to compensate for non-bijectiveness of padding) + datalen = len(lst).to_bytes(32, 'big') + # Convert to chunks + chunkz = [data[i:i+CHUNKSIZE] for i in range(0, len(data), CHUNKSIZE)] + # Tree-hash + while len(chunkz) > 1: + if len(chunkz) % 2 == 1: + chunkz.append(b'\x00' * CHUNKSIZE) + chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] + # Return hash of root and length data + return hash(chunkz[0] + datalen) +``` + +To hash a list, we simply do: + +```python +return merkle_hash([tree_hash(item) for item in value]) +``` + +Where `tree_hash` is a recursive application of the tree-hashing function (returning less than 32 bytes for short single values). + + +#### Container + +Recursively tree hash the values in the container in order sorted by key, and return the hash of the concatenation of the results. + +```python +return hash(b''.join([tree_hash(getattr(x, field)) for field in sorted(value.fields))) +``` + + ## Implementations | Language | Implementation | Description | From 694098ba2159902cb1c4f3fc4c391d25086356ba Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 14 Nov 2018 07:41:31 -0500 Subject: [PATCH 2/5] Update simple-serialize.md --- specs/simple-serialize.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index b2708b52fe..a8d5c7c5de 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -449,7 +449,7 @@ assert item_index == start + LENGTH_BYTES + length return typ(**values), item_index ``` -### Tree hash +### Tree_hash The below algorithm is defined recursively in the case of lists and containers, and it outputs a value equal to or less than 32 bytes in size. For the final output only (ie. not intermediate outputs), if the output is less than 32 bytes, right-zero-pad it to 32 bytes. The goal is collision resistance *within* each type, not between types. @@ -502,13 +502,13 @@ def merkle_hash(lst): return hash(chunkz[0] + datalen) ``` -To hash a list, we simply do: +To `tree_hash` a list, we simply do: ```python return merkle_hash([tree_hash(item) for item in value]) ``` -Where `tree_hash` is a recursive application of the tree-hashing function (returning less than 32 bytes for short single values). +Where the inner `tree_hash` is a recursive application of the tree-hashing function (returning less than 32 bytes for short single values). #### Container From 1450155b6b55fa2f5cf39cdb123047a33add43cb Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Thu, 15 Nov 2018 06:12:35 +0900 Subject: [PATCH 3/5] add one more ref to tree_hash --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index a8d5c7c5de..1dd8597238 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -451,7 +451,7 @@ return typ(**values), item_index ### Tree_hash -The below algorithm is defined recursively in the case of lists and containers, and it outputs a value equal to or less than 32 bytes in size. For the final output only (ie. not intermediate outputs), if the output is less than 32 bytes, right-zero-pad it to 32 bytes. The goal is collision resistance *within* each type, not between types. +The below `tree_hash` algorithm is defined recursively in the case of lists and containers, and it outputs a value equal to or less than 32 bytes in size. For the final output only (ie. not intermediate outputs), if the output is less than 32 bytes, right-zero-pad it to 32 bytes. The goal is collision resistance *within* each type, not between types. We define `hash(x)` as `BLAKE2b-512(x)[0:32]`. From 5f612486ca5f18884f9d6ac3a5ae1d187cad40a9 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 14 Nov 2018 19:34:47 -0500 Subject: [PATCH 4/5] Add the zero-item special case --- specs/simple-serialize.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index 1dd8597238..fa6b57f019 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -499,7 +499,7 @@ def merkle_hash(lst): chunkz.append(b'\x00' * CHUNKSIZE) chunkz = [hash(chunkz[i] + chunkz[i+1]) for i in range(0, len(chunkz), 2)] # Return hash of root and length data - return hash(chunkz[0] + datalen) + return hash((chunkz[0] if len(chunks) > 0 else b'\x00' * 32) + datalen) ``` To `tree_hash` a list, we simply do: From f131e73c6d0bba73897d5476bec08a350ff2eb37 Mon Sep 17 00:00:00 2001 From: Danny Ryan Date: Thu, 15 Nov 2018 21:43:05 +0900 Subject: [PATCH 5/5] list_to_glob to handle empty list --- specs/simple-serialize.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/simple-serialize.md b/specs/simple-serialize.md index fa6b57f019..a57350ea59 100644 --- a/specs/simple-serialize.md +++ b/specs/simple-serialize.md @@ -478,6 +478,8 @@ def extend_to_power_of_2(data): # Concatenate a list of homogeneous objects into data and pad it def list_to_glob(lst): + if len(lst) == 0: + return b'' if len(lst[0]) != next_power_of_2(len(lst[0])): lst = [extend_to_power_of_2(x) for x in lst] data = b''.join(lst)