From 2605dfba082ed57ad08951ab393483d6bf64a4f4 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Mon, 17 Jun 2019 11:16:00 -0400 Subject: [PATCH 01/22] Updates to SSZ partials --- specs/light_client/merkle_proofs.md | 224 +++++++++++++++++----------- 1 file changed, 139 insertions(+), 85 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index f009d97375..85d859a54d 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -64,43 +64,71 @@ y_data_root len(y) We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. ```python -def path_to_encoded_form(obj: Any, path: List[Union[str, int]]) -> List[int]: - if len(path) == 0: - return [] - elif isinstance(path[0], "__len__"): - assert len(path) == 1 - return [LENGTH_FLAG] - elif isinstance(path[0], str) and hasattr(obj, "fields"): - return [list(obj.fields.keys()).index(path[0])] + path_to_encoded_form(getattr(obj, path[0]), path[1:]) - elif isinstance(obj, (Vector, List)): - return [path[0]] + path_to_encoded_form(obj[path[0]], path[1:]) +def item_length(typ: Type) -> int: + """ + Returns the number of bytes in a basic type, or 32 (a full hash) for compound types. + """ + if typ == bool: + return 1 + elif issubclass(typ, uint): + return typ.byte_len else: - raise Exception("Unknown type / path") -``` - -We can now define a function `get_generalized_indices(object: Any, path: List[int], root: int=1) -> List[int]` that converts an object and a path to a set of generalized indices (note that for constant-sized objects, there is only one generalized index and it only depends on the path, but for dynamically sized objects the indices may depend on the object itself too). For dynamically-sized objects, the set of indices will have more than one member because of the need to access an array's length to determine the correct generalized index for some array access. - -```python -def get_generalized_indices(obj: Any, path: List[int], root: int=1) -> List[int]: - if len(path) == 0: - return [root] - elif isinstance(obj, Vector): - items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 - new_root = root * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk - return get_generalized_indices(obj[path[0]], path[1:], new_root) - elif isinstance(obj, List) and path[0] == LENGTH_FLAG: - return [root * 2 + 1] - elif isinstance(obj, List) and isinstance(path[0], int): - assert path[0] < len(obj) - items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 - new_root = root * 2 * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk - return [root *2 + 1] + get_generalized_indices(obj[path[0]], path[1:], new_root) - elif hasattr(obj, "fields"): - field = list(fields.keys())[path[0]] - new_root = root * next_power_of_2(len(fields)) + path[0] - return get_generalized_indices(getattr(obj, field), path[1:], new_root) + return 32 + + +def get_elem_type(typ: Type, index: int) -> Type: + """ + Returns the type of the element of an object of the given type with the given index + or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) + """ + return typ.get_fields_dict()[index] if is_container_type(typ) else typ.elem_type + + +def get_chunk_count(typ: Type) -> int: + """ + Returns the number of hashes needed to represent the top-level elements in the given type + (eg. `x.foo` or `x[7]` but not `x[7].bar` or `x.foo.baz`). In all cases except lists/vectors + of basic types, this is simply the number of top-level elements, as each element gets one + hash. For lists/vectors of basic types, it is often fewer because multiple basic elements + can be packed into one 32-byte chunk. + """ + if is_basic_type(typ): + return 1 + elif is_list_kind(typ) or is_vector_kind(typ): + return (typ.length * item_length(typ.elem_type) + 31) // 32 + else: + return len(typ.get_fields()) + + +def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int]: + """ + Returns three variables: (i) the index of the chunk in which the given element of the item is + represented, (ii) the starting byte position, (iii) the ending byte position. For example for + a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) + """ + if is_list_kind(typ) or is_vector_kind(typ): + start = index * item_length(typ.elem_type) + return start // 32, start % 32, start % 32 + item_length(typ.elem_type) + elif is_container_type(typ): + return typ.get_field_names().index(index), 0, item_length(get_elem_type(typ, index)) else: - raise Exception("Unknown type / path") + raise Exception("Only lists/vectors/containers supported") + + +def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: + """ + Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for + `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. + """ + for p in path: + assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further + if p == '__len__': + typ, root = uint256, root * 2 + 1 if is_list_kind(typ) else None + else: + pos, _, _ = get_item_position(typ, p) + root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + typ = get_elem_type(typ, p) + return root ``` ## Merkle multiproofs @@ -116,72 +144,98 @@ x x . . . . x * . are unused nodes, * are used nodes, x are the values we are trying to prove. Notice how despite being a multiproof for 3 values, it requires only 3 auxiliary nodes, only one node more than would be required to prove a single value. Normally the efficiency gains are not quite that extreme, but the savings relative to individual Merkle proofs are still significant. As a rule of thumb, a multiproof for k nodes at the same level of an n-node tree has size `k * (n/k + log(n/k))`. -Here is code for creating and verifying a multiproof. First, a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: +First, we provide a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: -```python -def get_proof_indices(tree_indices: List[int]) -> List[int]: - # Get all indices touched by the proof - maximal_indices = set() - for i in tree_indices: - x = i - while x > 1: - maximal_indices.add(x ^ 1) - x //= 2 - maximal_indices = tree_indices + sorted(list(maximal_indices))[::-1] - # Get indices that cannot be recalculated from earlier indices - redundant_indices = set() - proof = [] - for index in maximal_indices: - if index not in redundant_indices: - proof.append(index) - while index > 1: - redundant_indices.add(index) - if (index ^ 1) not in redundant_indices: - break - index //= 2 - return [i for i in proof if i not in tree_indices] +``` +def get_branch_indices(tree_index: int) -> List[int]: + """ + Get the generalized indices of the sister chunks along the path from the chunk with the + given tree index to the root. + """ + o = [tree_index ^ 1] + while o[-1] > 1: + o.append((o[-1] // 2) ^ 1) + return o[:-1] + +def get_expanded_indices(indices: List[int]) -> List[int]: + """ + Get the generalized indices of all chunks in the tree needed to prove the chunks with the given + generalized indices. + """ + branches = set() + for index in indices: + branches = branches.union(set(get_branch_indices(index) + [index])) + return sorted(list([x for x in branches if x*2 not in branches or x*2+1 not in branches]))[::-1] ``` -Generating a proof is simply a matter of taking the node of the SSZ hash tree with the union of the given generalized indices for each index given by `get_proof_indices`, and outputting the list of nodes in the same order. +Generating a proof that covers paths `p1 ... pn` is simply a matter of taking the chunks in the SSZ hash tree with generalized indices `get_expanded_indices([p1 ... pn])`. -Here is the verification function: +We now provide the bulk of the proving machinery, a function that takes a `{generalized_index: chunk}` map and fills in chunks that can be inferred (inferring the parent by hashing its two children): ```python -def verify_multi_proof(root: Bytes32, indices: List[int], leaves: List[Bytes32], proof: List[Bytes32]) -> bool: - tree = {} - for index, leaf in zip(indices, leaves): - tree[index] = leaf - for index, proof_item in zip(get_proof_indices(indices), proof): - tree[index] = proof_item - index_queue = sorted(tree.keys())[:-1] - i = 0 - while i < len(index_queue): - index = index_queue[i] - if index >= 2 and index ^ 1 in tree: - tree[index // 2] = hash(tree[index - index % 2] + tree[index - index % 2 + 1]) - index_queue.append(index // 2) - i += 1 - return (indices == []) or (1 in tree and tree[1] == root) +def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: + """ + Fills in chunks that can be inferred from other chunks. For a set of chunks that constitutes + a valid proof, this includes the root (generalized index 1). + """ + objects = {k: v for k, v in objects.items()} + keys = sorted(objects.keys())[::-1] + pos = 0 + while pos < len(keys): + k = keys[pos] + if k in objects and k ^ 1 in objects and k // 2 not in objects: + objects[k // 2] = hash(objects[k & - 2] + objects[k | 1]) + keys.append(k // 2) + pos += 1 + # Completeness and consistency check + assert 1 in objects + for k in objects: + if k > 1: + assert objects[k // 2] == hash(objects[k & -2] + objects[k | 1]) + return objects ``` ## MerklePartial -We define: +We define a container that encodes an SSZ partial, and provide the methods for converting it into a `{generalized_index: chunk}` map, for which we provide a method to extract individual values. To determine the hash tree root of an object represented by an SSZ partial, simply check `decode_ssz_partial(partial)[1]`. ### `SSZMerklePartial` +```python +class SSZMerklePartial(Container): + indices: List[uint64, 2**32] + chunks: List[Bytes32, 2**32] +``` + +### `decode_ssz_partial` ```python -{ - "root": "bytes32", - "indices": ["uint64"], - "values": ["bytes32"], - "proof": ["bytes32"] -} +def decode_ssz_partial(encoded: SSZMerklePartial) -> Dict[int, Bytes32]: + """ + Decodes an encoded SSZ partial into a generalized index -> chunk map, and verify hash consistency. + """ + full_indices = get_expanded_indices(encoded.indices) + return fill({k:v for k,v in zip(full_indices, encoded.chunks)}) ``` -### Proofs for execution +### `extract_value_at_path` -We define `MerklePartial(f, arg1, arg2..., focus=0)` as being a `SSZMerklePartial` object wrapping a Merkle multiproof of the set of nodes in the hash tree of the SSZ object `arg[focus]` that is needed to authenticate the parts of the object needed to compute `f(arg1, arg2...)`. +```python +def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Union[int, str]]) -> Any: + """ + Provides the value of the element in the object represented by the given encoded SSZ partial at + the given path. Returns a KeyError if that path is not covered by this SSZ partial. + """ + root = 1 + for p in path: + if p == '__len__': + return deserialize_basic(chunks[root * 2 + 1][:8], uint64) + if is_list_kind(typ): + assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) + pos, start, end = get_item_position(typ, p) + root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + typ = get_elem_type(typ, p) + return deserialize_basic(chunks[root][start: end], typ) +``` -Ideally, any function which accepts an SSZ object should also be able to accept a `SSZMerklePartial` object as a substitute. +Here [link TBD] is a python implementation of SSZ partials that represents them as a class that can be read and written to just like the underlying objects, so you can eg. perform state transitions on SSZ partials and compute the resulting root From 7e3318318d0d2a534f11cc2d0ff671cfe211fd1f Mon Sep 17 00:00:00 2001 From: vbuterin Date: Sun, 23 Jun 2019 00:29:45 -0400 Subject: [PATCH 02/22] Updated to newer SSZ --- specs/light_client/merkle_proofs.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 85d859a54d..b058be7ca4 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -94,7 +94,7 @@ def get_chunk_count(typ: Type) -> int: """ if is_basic_type(typ): return 1 - elif is_list_kind(typ) or is_vector_kind(typ): + elif issubclass(typ, (List, Vector, Bytes, BytesN)): return (typ.length * item_length(typ.elem_type) + 31) // 32 else: return len(typ.get_fields()) @@ -106,7 +106,7 @@ def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int] represented, (ii) the starting byte position, (iii) the ending byte position. For example for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ - if is_list_kind(typ) or is_vector_kind(typ): + if issubclass(typ, (List, Vector, Bytes, BytesN)): start = index * item_length(typ.elem_type) return start // 32, start % 32, start % 32 + item_length(typ.elem_type) elif is_container_type(typ): @@ -123,10 +123,10 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: for p in path: assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further if p == '__len__': - typ, root = uint256, root * 2 + 1 if is_list_kind(typ) else None + typ, root = uint256, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) - root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos typ = get_elem_type(typ, p) return root ``` @@ -230,10 +230,10 @@ def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Unio for p in path: if p == '__len__': return deserialize_basic(chunks[root * 2 + 1][:8], uint64) - if is_list_kind(typ): + if iissubclass(typ, (List, Bytes)): assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) pos, start, end = get_item_position(typ, p) - root = root * (2 if is_list_kind(typ) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos typ = get_elem_type(typ, p) return deserialize_basic(chunks[root][start: end], typ) ``` From cf7d65e8ff837abfd529fa4ab0381610c7ffd021 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Tue, 30 Jul 2019 12:15:46 -0400 Subject: [PATCH 03/22] Added generalized index handling functions --- specs/light_client/merkle_proofs.md | 38 ++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index b058be7ca4..f62dc8d5c4 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -115,7 +115,7 @@ def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int] raise Exception("Only lists/vectors/containers supported") -def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: +def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> GeneralizedIndex: """ Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. @@ -131,6 +131,42 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> int: return root ``` +### Helpers for generalized indices + +#### `concat_generalized_indices` + +```python +def concat_generalized_indices(*indices: Sequence[GeneralizedIndex]) -> GeneralizedIndex: + """ + Given generalized indices i1 for A -> B, i2 for B -> C .... i_n for Y -> Z, returns + the generalized index for A -> Z. + """ + o = GeneralizedIndex(1) + for i in indices: + o = o * get_previous_power_of_2(i) + i + return o +``` + +#### `get_generalized_index_length` + +```python +def get_generalized_index_length(index: GeneralizedIndex) -> int: + """ + Returns the length of a path represented by a generalized index. + """ + return log(index) +``` + +#### `get_generalized_index_bit` + +```python +def get_generalized_index_bit(index: GeneralizedIndex, bit: int) -> bool: + """ + Returns the i'th bit of a generalized index. + """ + return (index & (1 << bit)) > 0 +``` + ## Merkle multiproofs We define a Merkle multiproof as a minimal subset of nodes in a Merkle tree needed to fully authenticate that a set of nodes actually are part of a Merkle tree with some specified root, at a particular set of generalized indices. For example, here is the Merkle multiproof for positions 0, 1, 6 in an 8-node Merkle tree (i.e. generalized indices 8, 9, 14): From cb1a0cbd5f49eed89a0aa058a60e5e134543fffe Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 07:57:34 -0400 Subject: [PATCH 04/22] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index f62dc8d5c4..5b88fbbca8 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -120,6 +120,7 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized Converts a path (eg. `[7, "foo", 3]` for `x[7].foo[3]`, `[12, "bar", "__len__"]` for `len(x[12].bar)`) into the generalized index representing its position in the Merkle tree. """ + root = 1 for p in path: assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further if p == '__len__': From ed3749264b9ce5cbc444911da99ed289739e1d30 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 08:07:57 -0400 Subject: [PATCH 05/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 5b88fbbca8..114947326e 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -202,7 +202,7 @@ def get_expanded_indices(indices: List[int]) -> List[int]: branches = set() for index in indices: branches = branches.union(set(get_branch_indices(index) + [index])) - return sorted(list([x for x in branches if x*2 not in branches or x*2+1 not in branches]))[::-1] + return sorted([x for x in branches if x*2 not in branches or x*2+1 not in branches])[::-1] ``` Generating a proof that covers paths `p1 ... pn` is simply a matter of taking the chunks in the SSZ hash tree with generalized indices `get_expanded_indices([p1 ... pn])`. From 92fc0f2b81a2cc8a589fee54109533d843fdc182 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 08:08:12 -0400 Subject: [PATCH 06/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 114947326e..81650bdbf3 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -221,7 +221,7 @@ def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: while pos < len(keys): k = keys[pos] if k in objects and k ^ 1 in objects and k // 2 not in objects: - objects[k // 2] = hash(objects[k & - 2] + objects[k | 1]) + objects[k // 2] = hash(objects[k & -2] + objects[k | 1]) keys.append(k // 2) pos += 1 # Completeness and consistency check From 446ad3c392439fb916cf54cd6911d5dc5df1aab6 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 08:08:18 -0400 Subject: [PATCH 07/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 81650bdbf3..dae2a1704e 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -267,7 +267,7 @@ def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Unio for p in path: if p == '__len__': return deserialize_basic(chunks[root * 2 + 1][:8], uint64) - if iissubclass(typ, (List, Bytes)): + if issubclass(typ, (List, Bytes)): assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) pos, start, end = get_item_position(typ, p) root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos From 55f5f106f175d64d48befc910025f1f9c33b39c1 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 10:56:31 -0400 Subject: [PATCH 08/22] Updated type checkers for generalized index functions. --- specs/light_client/merkle_proofs.md | 44 +++++++++++++---------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index dae2a1704e..6107e459ca 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -17,12 +17,6 @@ -## Constants - -| Name | Value | -| - | - | -| `LENGTH_FLAG` | `2**64 - 1` | - ## Generalized Merkle tree index In a binary Merkle tree, we define a "generalized index" of a node as `2**depth + index`. Visually, this looks as follows: @@ -38,7 +32,8 @@ Note that the generalized index has the convenient property that the two childre ```python def merkle_tree(leaves: List[Bytes32]) -> List[Bytes32]: - o = [0] * len(leaves) + leaves + padded_length = next_power_of_2(len(leaves)) + o = [ZERO_HASH] * padded_length + leaves + [ZERO_HASH] * (padded_length - len(leaves)) for i in range(len(leaves) - 1, 0, -1): o[i] = hash(o[i * 2] + o[i * 2 + 1]) return o @@ -64,27 +59,24 @@ y_data_root len(y) We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. ```python -def item_length(typ: Type) -> int: +def item_length(typ: SSZType) -> int: """ Returns the number of bytes in a basic type, or 32 (a full hash) for compound types. """ - if typ == bool: - return 1 - elif issubclass(typ, uint): + if issubclass(typ, BasicValue): return typ.byte_len else: return 32 -def get_elem_type(typ: Type, index: int) -> Type: +def get_elem_type(typ: ComplexType, index: int) -> Type: """ Returns the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ - return typ.get_fields_dict()[index] if is_container_type(typ) else typ.elem_type + return typ.get_fields()[key] if issubclass(typ, Container) else typ.elem_type - -def get_chunk_count(typ: Type) -> int: +def chunk_count(typ: SSZType) -> int: """ Returns the number of hashes needed to represent the top-level elements in the given type (eg. `x.foo` or `x[7]` but not `x[7].bar` or `x.foo.baz`). In all cases except lists/vectors @@ -92,24 +84,28 @@ def get_chunk_count(typ: Type) -> int: hash. For lists/vectors of basic types, it is often fewer because multiple basic elements can be packed into one 32-byte chunk. """ - if is_basic_type(typ): + if issubclass(typ, BasicValue): return 1 - elif issubclass(typ, (List, Vector, Bytes, BytesN)): + elif issubclass(typ, Bits): + return (typ.length + 255) // 256 + elif issubclass(typ, Elements): return (typ.length * item_length(typ.elem_type) + 31) // 32 - else: + elif issubclass(typ, Container): return len(typ.get_fields()) + else: + raise Exception(f"Type not supported: {typ}") -def get_item_position(typ: Type, index: Union[int, str]) -> Tuple[int, int, int]: +def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, int]: """ Returns three variables: (i) the index of the chunk in which the given element of the item is represented, (ii) the starting byte position, (iii) the ending byte position. For example for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ - if issubclass(typ, (List, Vector, Bytes, BytesN)): + if issubclass(typ, Elements): start = index * item_length(typ.elem_type) return start // 32, start % 32, start % 32 + item_length(typ.elem_type) - elif is_container_type(typ): + elif issubclass(typ, Container): return typ.get_field_names().index(index), 0, item_length(get_elem_type(typ, index)) else: raise Exception("Only lists/vectors/containers supported") @@ -122,12 +118,12 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized """ root = 1 for p in path: - assert not is_basic_type(typ) # If we descend to a basic type, the path cannot continue further + assert not issubclass(typ, BasicValue) # If we descend to a basic type, the path cannot continue further if p == '__len__': typ, root = uint256, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) - root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos + root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(chunk_count(typ)) + pos typ = get_elem_type(typ, p) return root ``` @@ -197,7 +193,7 @@ def get_branch_indices(tree_index: int) -> List[int]: def get_expanded_indices(indices: List[int]) -> List[int]: """ Get the generalized indices of all chunks in the tree needed to prove the chunks with the given - generalized indices. + generalized indices, including the leaves. """ branches = set() for index in indices: From 725bdf822340db5048c161b54192f457bcde1ba2 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 11:40:40 -0400 Subject: [PATCH 09/22] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 6107e459ca..16cbd2908b 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -120,7 +120,7 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized for p in path: assert not issubclass(typ, BasicValue) # If we descend to a basic type, the path cannot continue further if p == '__len__': - typ, root = uint256, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None + typ, root = uint64, root * 2 + 1 if issubclass(typ, (List, Bytes)) else None else: pos, _, _ = get_item_position(typ, p) root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(chunk_count(typ)) + pos From 1b852adef1d48e51e38d5f5c9f760345a766202d Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:11:30 -0400 Subject: [PATCH 10/22] Simplified merkle multiproofs --- specs/light_client/merkle_proofs.md | 132 +++++++++++++--------------- 1 file changed, 62 insertions(+), 70 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 16cbd2908b..8f9b14fb59 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -157,11 +157,32 @@ def get_generalized_index_length(index: GeneralizedIndex) -> int: #### `get_generalized_index_bit` ```python -def get_generalized_index_bit(index: GeneralizedIndex, bit: int) -> bool: +def get_generalized_index_bit(index: GeneralizedIndex, position: int) -> bool: """ - Returns the i'th bit of a generalized index. + Returns the given bit of a generalized index. """ - return (index & (1 << bit)) > 0 + return (index & (1 << position)) > 0 +``` + +#### `generalized_index_sibling` + +```python +def generalized_index_sibling(index: GeneralizedIndex) -> GeneralizedIndex: + return index ^ 1 +``` + +#### `generalized_index_child` + +```python +def generalized_index_child(index: GeneralizedIndex, right_side: bool) -> GeneralizedIndex: + return index * 2 + right_side +``` + +#### `generalized_index_parent` + +```python +def generalized_index_parent(index: GeneralizedIndex) -> GeneralizedIndex: + return index // 2 ``` ## Merkle multiproofs @@ -180,38 +201,57 @@ x x . . . . x * First, we provide a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: ``` -def get_branch_indices(tree_index: int) -> List[int]: +def get_branch_indices(tree_index: GeneralizedIndex) -> List[GeneralizedIndex]: """ Get the generalized indices of the sister chunks along the path from the chunk with the given tree index to the root. """ - o = [tree_index ^ 1] + o = [generalized_index_sibling(tree_index)] while o[-1] > 1: - o.append((o[-1] // 2) ^ 1) + o.append(generalized_index_sibling(generalized_index_parent(o[-1]))) return o[:-1] -def get_expanded_indices(indices: List[int]) -> List[int]: +def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex]: """ - Get the generalized indices of all chunks in the tree needed to prove the chunks with the given - generalized indices, including the leaves. + Get the generalized indices of all "extra" chunks in the tree needed to prove the chunks with the given + generalized indices. Note that the decreasing order is chosen deliberately to ensure equivalence to the + order of hashes in a regular single-item Merkle proof in the single-item case. """ - branches = set() + all_indices = set() for index in indices: - branches = branches.union(set(get_branch_indices(index) + [index])) - return sorted([x for x in branches if x*2 not in branches or x*2+1 not in branches])[::-1] + all_indices = all_indices.union(set(get_branch_indices(index) + [index])) + + return sorted([ + x for x in all_indices if not + (generalized_index_child(x, 0) in all_indices and generalized_index_child(x, 1) in all_indices) and not + (x in indices) + ])[::-1] ``` -Generating a proof that covers paths `p1 ... pn` is simply a matter of taking the chunks in the SSZ hash tree with generalized indices `get_expanded_indices([p1 ... pn])`. +Now we provide the Merkle proof verification functions. First, for single item proofs: + +```python +def verify_merkle_proof(leaf: Hash, proof: Sequence[Hash], index: GeneralizedIndex, root: Hash) -> bool: + assert len(proof) == get_generalized_index_length(index) + for i, h in enumerate(proof): + if get_generalized_index_bit(index, i): + leaf = hash(h + leaf) + else: + leaf = hash(leaf + h) + return leaf == root +``` -We now provide the bulk of the proving machinery, a function that takes a `{generalized_index: chunk}` map and fills in chunks that can be inferred (inferring the parent by hashing its two children): +Now for multi-item proofs: ```python -def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: - """ - Fills in chunks that can be inferred from other chunks. For a set of chunks that constitutes - a valid proof, this includes the root (generalized index 1). - """ - objects = {k: v for k, v in objects.items()} +def verify_merkle_multiproof(leaves: Sequence[Hash], proof: Sequence[Hash], indices: Sequence[GeneralizedIndex], root: Hash) -> bool: + assert len(leaves) == len(indices) + helper_indices = get_helper_indices(indices) + assert len(proof) == len(helper_indices) + objects = { + **{index:node for index, node in zip(indices, leaves)}, + **{index:node for index, node in zip(helper_indices, proof)} + } keys = sorted(objects.keys())[::-1] pos = 0 while pos < len(keys): @@ -220,55 +260,7 @@ def fill(objects: Dict[int, Bytes32]) -> Dict[int, Bytes32]: objects[k // 2] = hash(objects[k & -2] + objects[k | 1]) keys.append(k // 2) pos += 1 - # Completeness and consistency check - assert 1 in objects - for k in objects: - if k > 1: - assert objects[k // 2] == hash(objects[k & -2] + objects[k | 1]) - return objects -``` - -## MerklePartial - -We define a container that encodes an SSZ partial, and provide the methods for converting it into a `{generalized_index: chunk}` map, for which we provide a method to extract individual values. To determine the hash tree root of an object represented by an SSZ partial, simply check `decode_ssz_partial(partial)[1]`. - -### `SSZMerklePartial` - -```python -class SSZMerklePartial(Container): - indices: List[uint64, 2**32] - chunks: List[Bytes32, 2**32] -``` - -### `decode_ssz_partial` - -```python -def decode_ssz_partial(encoded: SSZMerklePartial) -> Dict[int, Bytes32]: - """ - Decodes an encoded SSZ partial into a generalized index -> chunk map, and verify hash consistency. - """ - full_indices = get_expanded_indices(encoded.indices) - return fill({k:v for k,v in zip(full_indices, encoded.chunks)}) -``` - -### `extract_value_at_path` - -```python -def extract_value_at_path(chunks: Dict[int, Bytes32], typ: Type, path: List[Union[int, str]]) -> Any: - """ - Provides the value of the element in the object represented by the given encoded SSZ partial at - the given path. Returns a KeyError if that path is not covered by this SSZ partial. - """ - root = 1 - for p in path: - if p == '__len__': - return deserialize_basic(chunks[root * 2 + 1][:8], uint64) - if issubclass(typ, (List, Bytes)): - assert 0 <= p < deserialize_basic(chunks[root * 2 + 1][:8], uint64) - pos, start, end = get_item_position(typ, p) - root = root * (2 if issubclass(typ, (List, Bytes)) else 1) * next_power_of_two(get_chunk_count(typ)) + pos - typ = get_elem_type(typ, p) - return deserialize_basic(chunks[root][start: end], typ) + return objects[1] == root ``` -Here [link TBD] is a python implementation of SSZ partials that represents them as a class that can be read and written to just like the underlying objects, so you can eg. perform state transitions on SSZ partials and compute the resulting root +Note that the single-item proof is a special case of a multi-item proof; a valid single-item proof verifies correctly when put into the multi-item verification function (making the natural trivial changes to input arguments, `index -> [index]` and `leaf -> [leaf]`). From 44bd00164ed272e92d8a8a47c267502e8d4ae6e8 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:11:45 -0400 Subject: [PATCH 11/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 8f9b14fb59..fcc8031a8a 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -75,7 +75,8 @@ def get_elem_type(typ: ComplexType, index: int) -> Type: or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ return typ.get_fields()[key] if issubclass(typ, Container) else typ.elem_type - + + def chunk_count(typ: SSZType) -> int: """ Returns the number of hashes needed to represent the top-level elements in the given type From e93e7a3f1a29179bdfa8bd9f606a79927fa0c610 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:12:27 -0400 Subject: [PATCH 12/22] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index fcc8031a8a..af0ff760d0 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -69,7 +69,7 @@ def item_length(typ: SSZType) -> int: return 32 -def get_elem_type(typ: ComplexType, index: int) -> Type: +def get_elem_type(typ: ComplexType, index: Union[int, str]) -> Type: """ Returns the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) From 2e7c8fa529d1385cb10d990b6b150b75bafff7c3 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Thu, 1 Aug 2019 18:12:35 -0400 Subject: [PATCH 13/22] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index af0ff760d0..afa42f1841 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -74,7 +74,7 @@ def get_elem_type(typ: ComplexType, index: Union[int, str]) -> Type: Returns the type of the element of an object of the given type with the given index or member variable name (eg. `7` for `x[7]`, `"foo"` for `x.foo`) """ - return typ.get_fields()[key] if issubclass(typ, Container) else typ.elem_type + return typ.get_fields()[index] if issubclass(typ, Container) else typ.elem_type def chunk_count(typ: SSZType) -> int: From 0a874528a8e9ce31703554133393405b6c4ed438 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:43:03 -0400 Subject: [PATCH 14/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Danny Ryan --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index afa42f1841..9afa967388 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -201,7 +201,7 @@ x x . . . . x * First, we provide a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: -``` +```python def get_branch_indices(tree_index: GeneralizedIndex) -> List[GeneralizedIndex]: """ Get the generalized indices of the sister chunks along the path from the chunk with the From 845daf5b1f22d6b4e91c2713c8887f41d78750de Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:43:24 -0400 Subject: [PATCH 15/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 9afa967388..2a1103ca25 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -100,7 +100,7 @@ def chunk_count(typ: SSZType) -> int: def get_item_position(typ: SSZType, index: Union[int, str]) -> Tuple[int, int, int]: """ Returns three variables: (i) the index of the chunk in which the given element of the item is - represented, (ii) the starting byte position, (iii) the ending byte position. For example for + represented, (ii) the starting byte position within the chunk, (iii) the ending byte position within the chunk. For example for a 6-item list of uint64 values, index=2 will return (0, 16, 24), index=5 will return (1, 8, 16) """ if issubclass(typ, Elements): From 59307d1380de871ac1d031c48c781aae0c0c60b1 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:43:42 -0400 Subject: [PATCH 16/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 2a1103ca25..bf6cd7a61e 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -85,6 +85,7 @@ def chunk_count(typ: SSZType) -> int: hash. For lists/vectors of basic types, it is often fewer because multiple basic elements can be packed into one 32-byte chunk. """ + # typ.length describes the limit for list types, or the length for vector types. if issubclass(typ, BasicValue): return 1 elif issubclass(typ, Bits): From c6cdec8217b41f2cfa20f4f117fbaac47a4c89ab Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:45:26 -0400 Subject: [PATCH 17/22] Fixed get generalized indices --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index bf6cd7a61e..038990709e 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -142,7 +142,7 @@ def concat_generalized_indices(*indices: Sequence[GeneralizedIndex]) -> Generali """ o = GeneralizedIndex(1) for i in indices: - o = o * get_previous_power_of_2(i) + i + o = o * get_previous_power_of_2(i) + (i - get_previous_power_of_2(i)) return o ``` From c8d128aa59c5dc3c9ae419ce30b20a028444af8f Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:46:59 -0400 Subject: [PATCH 18/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 038990709e..2e00806ee0 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -259,7 +259,7 @@ def verify_merkle_multiproof(leaves: Sequence[Hash], proof: Sequence[Hash], indi while pos < len(keys): k = keys[pos] if k in objects and k ^ 1 in objects and k // 2 not in objects: - objects[k // 2] = hash(objects[k & -2] + objects[k | 1]) + objects[k // 2] = hash(objects[(k | 1) ^ 1] + objects[k | 1]) keys.append(k // 2) pos += 1 return objects[1] == root From 6138edc5bebd99c87759161e277c4d57823ad9a0 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:49:35 -0400 Subject: [PATCH 19/22] log -> log2 --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 2e00806ee0..469f347ac6 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -153,7 +153,7 @@ def get_generalized_index_length(index: GeneralizedIndex) -> int: """ Returns the length of a path represented by a generalized index. """ - return log(index) + return log2(index) ``` #### `get_generalized_index_bit` From 5237ac4954382e3b9f5a17dd9bf9fd6df0637876 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 2 Aug 2019 09:57:32 -0400 Subject: [PATCH 20/22] Update specs/light_client/merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 469f347ac6..e0be4f070e 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -132,6 +132,8 @@ def get_generalized_index(typ: Type, path: List[Union[int, str]]) -> Generalized ### Helpers for generalized indices +_Usage note: functions outside this section should manipulate generalized indices using only functions inside this section. This is to make it easier for developers to implement generalized indices with underlying representations other than bigints._ + #### `concat_generalized_indices` ```python From c7af2496ef0f6cf113b52508e6b352378ea14746 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 14 Aug 2019 23:44:19 +0200 Subject: [PATCH 21/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index e0be4f070e..698dce4e62 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -229,7 +229,7 @@ def get_helper_indices(indices: List[GeneralizedIndex]) -> List[GeneralizedIndex x for x in all_indices if not (generalized_index_child(x, 0) in all_indices and generalized_index_child(x, 1) in all_indices) and not (x in indices) - ])[::-1] + ], reverse=True) ``` Now we provide the Merkle proof verification functions. First, for single item proofs: From 72103e9deb0aeec8ead093913265b920c3452cbe Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 14 Aug 2019 23:44:26 +0200 Subject: [PATCH 22/22] Update specs/light_client/merkle_proofs.md Co-Authored-By: Diederik Loerakker --- specs/light_client/merkle_proofs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 698dce4e62..21115dd275 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -256,7 +256,7 @@ def verify_merkle_multiproof(leaves: Sequence[Hash], proof: Sequence[Hash], indi **{index:node for index, node in zip(indices, leaves)}, **{index:node for index, node in zip(helper_indices, proof)} } - keys = sorted(objects.keys())[::-1] + keys = sorted(objects.keys(), reverse=True) pos = 0 while pos < len(keys): k = keys[pos]