From 3371dcc23b1f412e4b93466436424ffaee96a8ae Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Wed, 13 Mar 2019 02:54:27 -0500 Subject: [PATCH 1/9] Added light client related files --- specs/light_client/merkle_proofs.md | 134 ++++++++++++++++++++++ specs/light_client/sync_protocol.md | 169 ++++++++++++++++++++++++++++ 2 files changed, 303 insertions(+) create mode 100644 specs/light_client/merkle_proofs.md create mode 100644 specs/light_client/sync_protocol.md diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md new file mode 100644 index 0000000000..cf4dad2e38 --- /dev/null +++ b/specs/light_client/merkle_proofs.md @@ -0,0 +1,134 @@ +### Generalized Merkle tree index + +In a binary Merkle tree, we define a "generalized index" of a node as `2**depth + index`. Visually, this looks as follows: + +``` + 1 + 2 3 +4 5 6 7 + ... +``` + +Note that the generalized index has the convenient property that the two children of node `k` are `2k` and `2k+1`, and also that it equals the position of a node in the linear representation of the Merkle tree that's computed by this function: + +```python +def merkle_tree(leaves): + o = [0] * len(leaves) + leaves + for i in range(len(leaves)-1, 0, -1): + o[i] = hash(o[i*2] + o[i*2+1]) + return o +``` + +We will define Merkle proofs in terms of generalized indices. + +### SSZ object to index + +We can describe the hash tree of any SSZ object, rooted in `hash_tree_root(object)`, as a binary Merkle tree whose depth may vary. For example, an object `{x: bytes32, y: List[uint64]}` would look as follows: + +``` + root + / \ + x y_root + / \ +y_data_root len(y) + / \ + /\ /\ + ....... +``` + +We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo[5]`. We'll describe paths as lists: in these three cases they are `["x"]`, `["y", "len"]` and `["y", 5]` respectively. We can now define a function `get_generalized_indices(object: Any, path: List[str OR int], root=1: int) -> int` that converts an object and a path to a set of generalized indices (note that for constant-sized objects, there is only one generalized index and it only depends on the path, but for dynamically sized objects the indices may depend on the object itself too). For dynamically-sized objects, the set of indices will have more than one member because of the need to access an array's length to determine the correct generalized index for some array access. + +```python +def get_generalized_indices(obj: Any, path: List[str or int], root=1) -> List[int]: + if len(path) == 0: + return [root] + elif isinstance(obj, StaticList): + items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 + new_root = root * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk + return get_generalized_indices(obj[path[0]], path[1:], new_root) + elif isinstance(obj, DynamicList) and path[0] == "len": + return [root * 2 + 1] + elif isinstance(obj, DynamicList) and isinstance(path[0], int): + assert path[0] < len(obj) + items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 + new_root = root * 2 * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk + return [root *2 + 1] + get_generalized_indices(obj[path[0]], path[1:], new_root) + elif hasattr(obj, "fields"): + index = list(fields.keys()).index(path[0]) + new_root = root * next_power_of_2(len(fields)) + index + return get_generalized_indices(getattr(obj, path[0]), path[1:], new_root) + else: + raise Exception("Unknown type / path") +``` + +### Merkle multiproofs + +We define a Merkle multiproof as a minimal subset of nodes in a Merkle tree needed to fully authenticate that a set of nodes actually are part of a Merkle tree with some specified root, at a particular set of generalized indices. For example, here is the Merkle multiproof for positions 0, 1, 6 in an 8-node Merkle tree (ie. generalized indices 8, 9, 14): + +``` + . + . . + . * * . +x x . . . . x * +``` + +. are unused nodes, * are used nodes, x are the values we are trying to prove. Notice how despite being a multiproof for 3 values, it requires only 3 auxiliary nodes, only one node more than would be required to prove a single value. Normally the efficiency gains are not quite that extreme, but the savings relative to individual Merkle proofs are still significant. As a rule of thumb, a multiproof for k nodes at the same level of an n-node tree has size `k * (n/k + log(n/k))`. + +Here is code for creating and verifying a multiproof. First a helper: + +```python +def log2(x): + return 0 if x == 1 else 1 + log2(x//2) +``` + +First, a method for computing the generalized indices of the auxiliary tree nodes that a proof of a given set of generalized indices will require: + +```python +def get_proof_indices(tree_indices: List[int]) -> List[int]: + # Get all indices touched by the proof + maximal_indices = set({}) + for i in tree_indices: + x = i + while x > 1: + maximal_indices.add(x ^ 1) + x //= 2 + maximal_indices = tree_indices + sorted(list(maximal_indices))[::-1] + # Get indices that cannot be recalculated from earlier indices + redundant_indices = set({}) + proof = [] + for index in maximal_indices: + if index not in redundant_indices: + proof.append(index) + while index > 1: + redundant_indices.add(index) + if (index ^ 1) not in redundant_indices: + break + index //= 2 + return [i for i in proof if i not in tree_indices] +```` + +Generating a proof is simply a matter of taking the node of the SSZ hash tree with the union of the given generalized indices for each index given by `get_proof_indices`, and outputting the list of nodes in the same order. + +```python +def verify_multi_proof(root, indices, leaves, proof): + tree = {} + for index, leaf in zip(indices, leaves): + tree[index] = leaf + for index, proofitem in zip(get_proof_indices(indices), proof): + tree[index] = proofitem + indexqueue = sorted(tree.keys())[:-1] + i = 0 + while i < len(indexqueue): + index = indexqueue[i] + if index >= 2 and index^1 in tree: + tree[index//2] = hash(tree[index - index%2] + tree[index - index%2 + 1]) + indexqueue.append(index//2) + i += 1 + return (indices == []) or (1 in tree and tree[1] == root) +``` + +#### Proofs for execution + +We define `MerklePartial(f, arg1, arg2...)` as being a list of Merkle multiproofs of the sets of nodes in the hash trees of the SSZ objects that are needed to authenticate the values needed to compute some function `f(arg1, arg2...)`. An individual Merkle multiproof is given as a dynamic sized list of `bytes32` values, a `MerklePartial` is a fixed-size list of objects `{proof: ["bytes32"], value: "bytes32"}`, one for each `arg` to `f` (if some `arg` is a base type, then the multiproof is empty). + +Ideally, any function which accepts an SSZ object should also be able to accept a `MerklePartial` object as a substitute. diff --git a/specs/light_client/sync_protocol.md b/specs/light_client/sync_protocol.md new file mode 100644 index 0000000000..2a70dbb314 --- /dev/null +++ b/specs/light_client/sync_protocol.md @@ -0,0 +1,169 @@ +# Beacon chain light client syncing + +One of the design goals of the eth2 beacon chain is light-client friendlines, both to allow low-resource clients (mobile phones, IoT, etc) to maintain access to the blockchain in a reasonably safe way, but also to facilitate the development of "bridges" between the eth2 beacon chain and other chains. + +### Preliminaries + +We define an "expansion" of an object as an object where a field in an object that is meant to represent the `hash_tree_root` of another object is replaced by the object. Note that defining expansions is not a consensus-layer-change; it is merely a "re-interpretation" of the object. Particularly, the `hash_tree_root` of an expansion of an object is identical to that of the original object, and we can define expansions where, given a complete history, it is always possible to compute the expansion of any object in the history. The opposite of an expansion is a "summary" (eg. `BeaconBlockHeader` is a summary of `BeaconBlock`). + +We define two expansions: + +* `ExtendedBeaconBlock`, which is identical to a `BeaconBlock` except `state_root` is replaced with the corresponding `state: ExtendedBeaconState` +* `ExtendedBeaconState`, which is identical to a `BeaconState` except `latest_active_index_roots: List[Bytes32]` is replaced by `latest_active_indices: List[List[ValidatorIndex]]`, where `BeaconState.latest_active_index_roots[i] = hash_tree_root(ExtendedBeaconState.latest_active_indices[i])` + +Note that there is now a new way to compute `get_active_validator_indices`: + +```python +def get_active_validator_indices(state: BeaconState, epoch: Epoch) -> List[ValidatorIndex]: + return state.latest_active_indices[epoch % LATEST_ACTIVE_INDEX_ROOTS_LENGTH] +``` + +Note that it takes `state` instead of `state.validator_registry` as an argument. This does not affect its use in `get_shuffled_committee`, because `get_shuffled_committee` has access to the full `state` as one of its arguments. + +A `MerklePartial(f, *args)` is an object that contains a minimal Merkle proof needed to compute `f(*args)`. A `MerklePartial` can be used in place of a regular SSZ object, though a computation would return an error if it attempts to access part of the object that is not contained in the proof. + +We add a data type `PeriodData` and four helpers: + +```python +{ + 'validator_count': 'uint64', + 'seed': 'bytes32', + 'committee': [Validator] +} +``` + +```python +def get_earlier_start_epoch(slot: Slot) -> int: + return slot - slot % PERSISTENT_COMMITTEE_PERIOD - PERSISTENT_COMMITTEE_PERIOD * 2 +def get_later_start_epoch(slot: Slot) -> int: + return slot - slot % PERSISTENT_COMMITTEE_PERIOD - PERSISTENT_COMMITTEE_PERIOD + +def get_earlier_period_data(block: ExtendedBeaconBlock, shard_id: Shard) -> PeriodData: + period_start = get_earlier_start_epoch(header.slot) + validator_count = len(get_active_validator_indices(state, period_start)) + committee_count = validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE) + 1 + indices = get_shuffled_committee(block.state, shard_id, period_start, 0, committee_count) + return PeriodData( + validator_count, + generate_seed(block.state, period_start), + [block.state.validator_registry[i] for i in indices] + ) + +def get_later_period_data(block: ExtendedBeaconBlock, shard_id: Shard) -> PeriodData: + period_start = get_later_start_epoch(header.slot) + validator_count = len(get_active_validator_indices(state, period_start)) + committee_count = validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE) + 1 + indices = get_shuffled_committee(block.state, shard_id, period_start, 0, committee_count) + return PeriodData( + validator_count, + generate_seed(block.state, period_start), + [block.state.validator_registry[i] for i in indices] + ) +``` + +### Light client state + +A light client will keep track of: + +* A random `shard_id` in `[0...SHARD_COUNT-1]` (selected once and retained forever) +* A block header that they consider to be finalized (`finalized_header`) and do not expect to revert. +* `later_period_data = get_maximal_later_committee(finalized_header, shard_id)` +* `earlier_period_data = get_maximal_earlier_committee(finalized_header, shard_id)` + +We use the struct `validator_memory` to keep track of these variables. + +### Updating the shuffled committee + +If a client's `validator_memory.finalized_header` changes so that `header.slot // PERSISTENT_COMMITTEE_PERIOD` increases, then the client can ask the network for a `new_committee_proof = MerklePartial(get_maximal_later_committee, validator_memory.finalized_header, shard_id)`. It can then compute: + +```python +earlier_period_data = later_period_data +later_period_data = get_later_period_data(new_committee_proof, finalized_header, shard_id) +``` + +The maximum size of a proof is `128 * ((22-7) * 32 + 110) = 75520` bytes for validator records and `(22-7) * 32 + 128 * 8 = 1504` for the active index proof (much smaller because the relevant active indices are all beside each other in the Merkle tree). This needs to be done once per `PERSISTENT_COMMITTEE_PERIOD` epochs (2048 epochs / 9 days), or ~38 bytes per epoch. + +### Computing the current committee + +Here is a helper to compute the committee at a slot given the maximal earlier and later committees: + +```python +def compute_committee(header: BeaconBlockHeader, + validator_memory: ValidatorMemory): + + earlier_validator_count = validator_memory.earlier_period_data.validator_count + later_validator_count = validator_memory.later_period_data.validator_count + earlier_committee = validator_memory.earlier_period_data.committee + later_committee = validator_memory.later_period_data.committee + earlier_start_epoch = get_earlier_start_epoch(header.slot) + later_start_epoch = get_later_start_epoch(header.slot) + epoch = slot_to_epoch(header.slot) + + actual_committee_count = max( + earlier_validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE), + later_validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE), + ) + 1 + + def get_offset(count, end:bool): + return get_split_offset(count, + SHARD_COUNT * committee_count, + validator_memory.shard_id * committee_count + (1 if end else 0)) + + actual_earlier_committee = maximal_earlier_committee[ + 0:get_offset(earlier_validator_count, True) - get_offset(earlier_validator_count, False) + ] + actual_later_committee = maximal_later_committee[ + 0:get_offset(later_validator_count, True) - get_offset(later_validator_count, False) + ] + def get_switchover_epoch(index): + return ( + bytes_to_int(hash(validator_memory.earlier_period_data.seed + bytes3(index))[0:8]) % + PERSISTENT_COMMITTEE_PERIOD + ) + # Take not-yet-cycled-out validators from earlier committee and already-cycled-in validators from + # later committee; return a sorted list of the union of the two, deduplicated + return sorted(list(set( + [i for i in earlier_committee if epoch % PERSISTENT_COMMITTEE_PERIOD < get_switchover_epoch(i)] + + [i for i in later_committee if epoch % PERSISTENT_COMMITTEE_PERIOD >= get_switchover_epoch(i)] + ))) + +``` + +Note that this method makes use of the fact that the committee for any given shard always starts and ends at the same validator index independently of the committee count (this is because the validator set is split into `SHARD_COUNT * committee_count` slices but the first slice of a shard is a multiple `committee_count * i`, so the start of the slice is `n * committee_count * i // (SHARD_COUNT * committee_count) = n * i // SHARD_COUNT`, using the slightly nontrivial algebraic identity `(x * a) // ab == x // b`). + +### Verifying blocks + +If a client wants to update its `finalized_header` it asks the network for a `BlockValidityProof`, which is simply: + +```python +{ + 'header': BlockHeader, + 'shard_aggregate_signature': 'bytes96', + 'shard_bitfield': 'bytes', + 'shard_parent_block': ShardBlock +} +``` + +The verification procedure is as follows: + +```python +def verify_block_validity_proof(proof: BlockValidityProof, validator_memory: ValidatorMemory) -> bool: + assert proof.shard_parent_block.beacon_chain_ref == hash_tree_root(proof.header) + committee = compute_committee(proof.header, validator_memory) + # Verify that we have >=50% support + support_balance = sum([c.high_balance for i, c in enumerate(committee) if get_bitfield_bit(proof.shard_bitfield, i) is True]) + total_balance = sum([c.high_balance for i, c in enumerate(committee)] + assert support_balance * 2 > total_balance + # Verify shard attestations + group_public_key = bls_aggregate_pubkeys([ + v.pubkey for v, index in enumerate(committee) if + get_bitfield_bit(proof.shard_bitfield, i) is True + ]) + assert bls_verify( + pubkey=group_public_key, + message_hash=hash_tree_root(shard_parent_block), + signature=shard_aggregate_signature, + domain=get_domain(state, slot_to_epoch(shard_block.slot), DOMAIN_SHARD_ATTESTER) + ) +``` +The size of this proof is only 200 (header) + 96 (signature) + 16 (bitfield) + 352 (shard block) = 664 bytes. It can be reduced further by replacing `ShardBlock` with `MerklePartial(lambda x: x.beacon_chain_ref, ShardBlock)`, which would cut off ~220 bytes. From 154eec0d027468d1a228f7f76e233eec9c4320ff Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Wed, 13 Mar 2019 03:04:16 -0500 Subject: [PATCH 2/9] Added links to light client docs in the readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c5c88daf9e..e37539e3ba 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ Accompanying documents can be found in [specs](specs) and include * [BLS signature verification](specs/bls_signature.md) * [General test format](specs/test-format.md) * [Honest validator implementation doc](specs/validator/0_beacon-chain-validator.md) +* [Merkle proof formats](specs/light_client/merkle_proofs.md) +* [Light client syncing protocol](specs/light_client/sync_protocol.md) ## Design goals The following are the broad design goals for Ethereum 2.0: From b65601afdae18b35c9aad8dfa25c1c677f757ec0 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Thu, 14 Mar 2019 08:29:03 -0500 Subject: [PATCH 3/9] Updated Merkle proof file --- specs/light_client/merkle_proofs.md | 63 +++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index cf4dad2e38..f529411186 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -1,3 +1,9 @@ +### Constants + +| Name | Value | +| - | - | +| `LENGTH_FLAG` | `2**64 - 1` | + ### Generalized Merkle tree index In a binary Merkle tree, we define a "generalized index" of a node as `2**depth + index`. Visually, this looks as follows: @@ -36,17 +42,34 @@ y_data_root len(y) ....... ``` -We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo[5]`. We'll describe paths as lists: in these three cases they are `["x"]`, `["y", "len"]` and `["y", 5]` respectively. We can now define a function `get_generalized_indices(object: Any, path: List[str OR int], root=1: int) -> int` that converts an object and a path to a set of generalized indices (note that for constant-sized objects, there is only one generalized index and it only depends on the path, but for dynamically sized objects the indices may depend on the object itself too). For dynamically-sized objects, the set of indices will have more than one member because of the need to access an array's length to determine the correct generalized index for some array access. +We can now define a concept of a "path", a way of describing a function that takes as input an SSZ object and outputs some specific (possibly deeply nested) member. For example, `foo -> foo.x` is a path, as are `foo -> len(foo.y)` and `foo -> foo.y[5].w`. We'll describe paths as lists, which can have two representations. In "human-readable form", they are `["x"]`, `["y", "__len__"]` and `["y", 5, "w"]` respectively. In "encoded form", they are lists of `uint64` values, in these cases (assuming the fields of `foo` in order are `x` then `y`, and `w` is the first field of `y[i]`) `[0]`, `[1, 2**64-1]`, `[1, 5, 0]`. ```python -def get_generalized_indices(obj: Any, path: List[str or int], root=1) -> List[int]: +def path_to_encoded_form(obj: Any, path: List[str or int]) -> List[int]: + if len(path) == 0: + return [] + if isinstance(path[0], "__len__"): + assert len(path) == 1 + return [LENGTH_FLAG] + elif isinstance(path[0], str) and hasattr(obj, "fields"): + return [list(obj.fields.keys()).index(path[0])] + path_to_encoded_form(getattr(obj, path[0]), path[1:]) + elif isinstance(obj, (StaticList, DynamicList)): + return [path[0]] + path_to_encoded_form(obj[path[0]], path[1:]) + else: + raise Exception("Unknown type / path") +``` + +We can now define a function `get_generalized_indices(object: Any, path: List[int], root=1: int) -> int` that converts an object and a path to a set of generalized indices (note that for constant-sized objects, there is only one generalized index and it only depends on the path, but for dynamically sized objects the indices may depend on the object itself too). For dynamically-sized objects, the set of indices will have more than one member because of the need to access an array's length to determine the correct generalized index for some array access. + +```python +def get_generalized_indices(obj: Any, path: List[int], root=1) -> List[int]: if len(path) == 0: return [root] elif isinstance(obj, StaticList): items_per_chunk = (32 // len(serialize(x))) if isinstance(x, int) else 1 new_root = root * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk return get_generalized_indices(obj[path[0]], path[1:], new_root) - elif isinstance(obj, DynamicList) and path[0] == "len": + elif isinstance(obj, DynamicList) and path[0] == LENGTH_FLAG: return [root * 2 + 1] elif isinstance(obj, DynamicList) and isinstance(path[0], int): assert path[0] < len(obj) @@ -54,9 +77,9 @@ def get_generalized_indices(obj: Any, path: List[str or int], root=1) -> List[in new_root = root * 2 * next_power_of_2(len(obj) // items_per_chunk) + path[0] // items_per_chunk return [root *2 + 1] + get_generalized_indices(obj[path[0]], path[1:], new_root) elif hasattr(obj, "fields"): - index = list(fields.keys()).index(path[0]) - new_root = root * next_power_of_2(len(fields)) + index - return get_generalized_indices(getattr(obj, path[0]), path[1:], new_root) + field = list(fields.keys())[path[0]] + new_root = root * next_power_of_2(len(fields)) + path[0] + return get_generalized_indices(getattr(obj, field), path[1:], new_root) else: raise Exception("Unknown type / path") ``` @@ -109,6 +132,8 @@ def get_proof_indices(tree_indices: List[int]) -> List[int]: Generating a proof is simply a matter of taking the node of the SSZ hash tree with the union of the given generalized indices for each index given by `get_proof_indices`, and outputting the list of nodes in the same order. +Here is the verification function: + ```python def verify_multi_proof(root, indices, leaves, proof): tree = {} @@ -127,8 +152,32 @@ def verify_multi_proof(root, indices, leaves, proof): return (indices == []) or (1 in tree and tree[1] == root) ``` +### MerklePartial + +We define: + +#### `MerklePartialLeaf` + +```python +{ + "path": ["uint64"], + "value": "bytes32" +} +``` + +#### `MerklePartial` + + +```python +{ + "root": "bytes32", + "values": [MerklePartialLeaf], + "proof": ["bytes32"] +} +``` + #### Proofs for execution -We define `MerklePartial(f, arg1, arg2...)` as being a list of Merkle multiproofs of the sets of nodes in the hash trees of the SSZ objects that are needed to authenticate the values needed to compute some function `f(arg1, arg2...)`. An individual Merkle multiproof is given as a dynamic sized list of `bytes32` values, a `MerklePartial` is a fixed-size list of objects `{proof: ["bytes32"], value: "bytes32"}`, one for each `arg` to `f` (if some `arg` is a base type, then the multiproof is empty). +We define `MerklePartial(f, arg1, arg2..., focus=0)` as being a `MerklePartial` object wrapping a Merkle multiproof of the set of nodes in the hash tree of the SSZ object `arg[focus]` that is needed to authenticate the parts of the object needed to compute `f(arg1, arg2...)`. Ideally, any function which accepts an SSZ object should also be able to accept a `MerklePartial` object as a substitute. From 22be21223b90160f4f772146d821b34848ee3572 Mon Sep 17 00:00:00 2001 From: Justin Date: Fri, 15 Mar 2019 11:24:59 +0000 Subject: [PATCH 4/9] Update merkle_proofs.md --- specs/light_client/merkle_proofs.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index f529411186..311a4aa5ca 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -1,3 +1,5 @@ +**NOTICE**: This document is a work-in-progress for researchers and implementers. + ### Constants | Name | Value | From b566722b52ad2cd9f92a0b48953e3b784a04853d Mon Sep 17 00:00:00 2001 From: Justin Date: Fri, 15 Mar 2019 11:25:15 +0000 Subject: [PATCH 5/9] Update sync_protocol.md --- specs/light_client/sync_protocol.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/light_client/sync_protocol.md b/specs/light_client/sync_protocol.md index 2a70dbb314..b84d55dcfb 100644 --- a/specs/light_client/sync_protocol.md +++ b/specs/light_client/sync_protocol.md @@ -1,3 +1,5 @@ +**NOTICE**: This document is a work-in-progress for researchers and implementers. + # Beacon chain light client syncing One of the design goals of the eth2 beacon chain is light-client friendlines, both to allow low-resource clients (mobile phones, IoT, etc) to maintain access to the blockchain in a reasonably safe way, but also to facilitate the development of "bridges" between the eth2 beacon chain and other chains. From 1967a8939d54601e56946705453f304118042bb5 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Sun, 17 Mar 2019 06:25:56 -0500 Subject: [PATCH 6/9] Fixed some variable names --- specs/light_client/sync_protocol.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/specs/light_client/sync_protocol.md b/specs/light_client/sync_protocol.md index b84d55dcfb..2a420abcb1 100644 --- a/specs/light_client/sync_protocol.md +++ b/specs/light_client/sync_protocol.md @@ -69,14 +69,14 @@ A light client will keep track of: * A random `shard_id` in `[0...SHARD_COUNT-1]` (selected once and retained forever) * A block header that they consider to be finalized (`finalized_header`) and do not expect to revert. -* `later_period_data = get_maximal_later_committee(finalized_header, shard_id)` -* `earlier_period_data = get_maximal_earlier_committee(finalized_header, shard_id)` +* `later_period_data = get_later_period_data(finalized_header, shard_id)` +* `earlier_period_data = get_earlier_period_data(finalized_header, shard_id)` We use the struct `validator_memory` to keep track of these variables. ### Updating the shuffled committee -If a client's `validator_memory.finalized_header` changes so that `header.slot // PERSISTENT_COMMITTEE_PERIOD` increases, then the client can ask the network for a `new_committee_proof = MerklePartial(get_maximal_later_committee, validator_memory.finalized_header, shard_id)`. It can then compute: +If a client's `validator_memory.finalized_header` changes so that `header.slot // PERSISTENT_COMMITTEE_PERIOD` increases, then the client can ask the network for a `new_committee_proof = MerklePartial(get_later_period_data, validator_memory.finalized_header, shard_id)`. It can then compute: ```python earlier_period_data = later_period_data @@ -95,13 +95,13 @@ def compute_committee(header: BeaconBlockHeader, earlier_validator_count = validator_memory.earlier_period_data.validator_count later_validator_count = validator_memory.later_period_data.validator_count - earlier_committee = validator_memory.earlier_period_data.committee - later_committee = validator_memory.later_period_data.committee + maximal_earlier_committee = validator_memory.earlier_period_data.committee + maximal_later_committee = validator_memory.later_period_data.committee earlier_start_epoch = get_earlier_start_epoch(header.slot) later_start_epoch = get_later_start_epoch(header.slot) epoch = slot_to_epoch(header.slot) - actual_committee_count = max( + committee_count = max( earlier_validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE), later_validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE), ) + 1 From ced6208d55d26d63f532d4bb031869740b2a111c Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 20 Mar 2019 11:49:28 -0500 Subject: [PATCH 7/9] Edits * shuffled committee -> period committee * Reduced code redundancy --- specs/light_client/sync_protocol.md | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/specs/light_client/sync_protocol.md b/specs/light_client/sync_protocol.md index 2a420abcb1..a8cdd50c2d 100644 --- a/specs/light_client/sync_protocol.md +++ b/specs/light_client/sync_protocol.md @@ -37,25 +37,15 @@ We add a data type `PeriodData` and four helpers: ```python def get_earlier_start_epoch(slot: Slot) -> int: return slot - slot % PERSISTENT_COMMITTEE_PERIOD - PERSISTENT_COMMITTEE_PERIOD * 2 + def get_later_start_epoch(slot: Slot) -> int: return slot - slot % PERSISTENT_COMMITTEE_PERIOD - PERSISTENT_COMMITTEE_PERIOD -def get_earlier_period_data(block: ExtendedBeaconBlock, shard_id: Shard) -> PeriodData: - period_start = get_earlier_start_epoch(header.slot) - validator_count = len(get_active_validator_indices(state, period_start)) - committee_count = validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE) + 1 - indices = get_shuffled_committee(block.state, shard_id, period_start, 0, committee_count) - return PeriodData( - validator_count, - generate_seed(block.state, period_start), - [block.state.validator_registry[i] for i in indices] - ) - -def get_later_period_data(block: ExtendedBeaconBlock, shard_id: Shard) -> PeriodData: - period_start = get_later_start_epoch(header.slot) +def get_period_data(block: ExtendedBeaconBlock, shard_id: Shard, later: bool) -> PeriodData: + period_start = get_later_start_epoch(header.slot) if later else get_earlier_start_epoch(header.slot) validator_count = len(get_active_validator_indices(state, period_start)) committee_count = validator_count // (SHARD_COUNT * TARGET_COMMITTEE_SIZE) + 1 - indices = get_shuffled_committee(block.state, shard_id, period_start, 0, committee_count) + indices = get_period_committee(block.state, shard_id, period_start, 0, committee_count) return PeriodData( validator_count, generate_seed(block.state, period_start), @@ -69,18 +59,18 @@ A light client will keep track of: * A random `shard_id` in `[0...SHARD_COUNT-1]` (selected once and retained forever) * A block header that they consider to be finalized (`finalized_header`) and do not expect to revert. -* `later_period_data = get_later_period_data(finalized_header, shard_id)` -* `earlier_period_data = get_earlier_period_data(finalized_header, shard_id)` +* `later_period_data = get_period_data(finalized_header, shard_id, later=True)` +* `earlier_period_data = get_period_data(finalized_header, shard_id, later=False)` We use the struct `validator_memory` to keep track of these variables. ### Updating the shuffled committee -If a client's `validator_memory.finalized_header` changes so that `header.slot // PERSISTENT_COMMITTEE_PERIOD` increases, then the client can ask the network for a `new_committee_proof = MerklePartial(get_later_period_data, validator_memory.finalized_header, shard_id)`. It can then compute: +If a client's `validator_memory.finalized_header` changes so that `header.slot // PERSISTENT_COMMITTEE_PERIOD` increases, then the client can ask the network for a `new_committee_proof = MerklePartial(get_period_data, validator_memory.finalized_header, shard_id, later=True)`. It can then compute: ```python earlier_period_data = later_period_data -later_period_data = get_later_period_data(new_committee_proof, finalized_header, shard_id) +later_period_data = get_period_data(new_committee_proof, finalized_header, shard_id, later=True) ``` The maximum size of a proof is `128 * ((22-7) * 32 + 110) = 75520` bytes for validator records and `(22-7) * 32 + 128 * 8 = 1504` for the active index proof (much smaller because the relevant active indices are all beside each other in the Merkle tree). This needs to be done once per `PERSISTENT_COMMITTEE_PERIOD` epochs (2048 epochs / 9 days), or ~38 bytes per epoch. From b91dfd6244f7196827a311d879aa29ad7eb83f05 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Tue, 26 Mar 2019 12:56:35 -0500 Subject: [PATCH 8/9] Add merkle_parial_from_paths --- specs/light_client/merkle_proofs.md | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 311a4aa5ca..2e92488cb7 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -158,26 +158,32 @@ def verify_multi_proof(root, indices, leaves, proof): We define: -#### `MerklePartialLeaf` - -```python -{ - "path": ["uint64"], - "value": "bytes32" -} -``` - #### `MerklePartial` ```python { "root": "bytes32", - "values": [MerklePartialLeaf], + "indices": ["uint64"], + "values": ["bytes32"], "proof": ["bytes32"] } ``` +#### `merkle_partial_from_paths` + +```python +def merkle_partial_from_paths(obj, paths): + indices = set() + for path in paths: + indices = indices.union(get_generalized_indices(obj, path)) + return MerklePartial( + root=hash_tree_root(obj), + indices=indices, + values= mk_multi_proof + ) +``` + #### Proofs for execution We define `MerklePartial(f, arg1, arg2..., focus=0)` as being a `MerklePartial` object wrapping a Merkle multiproof of the set of nodes in the hash tree of the SSZ object `arg[focus]` that is needed to authenticate the parts of the object needed to compute `f(arg1, arg2...)`. From bee740e834a04509c24a741d8f2e5abaaeb28e27 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Wed, 3 Apr 2019 01:06:18 -0500 Subject: [PATCH 9/9] Removed merkle partial from paths for now --- specs/light_client/merkle_proofs.md | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/specs/light_client/merkle_proofs.md b/specs/light_client/merkle_proofs.md index 2e92488cb7..285445ca85 100644 --- a/specs/light_client/merkle_proofs.md +++ b/specs/light_client/merkle_proofs.md @@ -170,20 +170,6 @@ We define: } ``` -#### `merkle_partial_from_paths` - -```python -def merkle_partial_from_paths(obj, paths): - indices = set() - for path in paths: - indices = indices.union(get_generalized_indices(obj, path)) - return MerklePartial( - root=hash_tree_root(obj), - indices=indices, - values= mk_multi_proof - ) -``` - #### Proofs for execution We define `MerklePartial(f, arg1, arg2..., focus=0)` as being a `MerklePartial` object wrapping a Merkle multiproof of the set of nodes in the hash tree of the SSZ object `arg[focus]` that is needed to authenticate the parts of the object needed to compute `f(arg1, arg2...)`.