From d6a37ecfcb561aa93d1d9e0c11f507343d8c1095 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 10 Nov 2023 03:38:02 +0800 Subject: [PATCH 01/25] Copied from Danny's ethresearch post --- specs/peerdas/das-core.md | 174 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 specs/peerdas/das-core.md diff --git a/specs/peerdas/das-core.md b/specs/peerdas/das-core.md new file mode 100644 index 0000000000..c6c3811a12 --- /dev/null +++ b/specs/peerdas/das-core.md @@ -0,0 +1,174 @@ +# Peer Data Availability Sampling -- Core + +**Notice**: This document is a work-in-progress for researchers and implementers. + +## Table of contents + + + + + +- [Custom types](#custom-types) +- [Configuration](#configuration) + - [Data size](#data-size) + - [Custody setting](#custody-setting) + - [Helper functions](#helper-functions) + - [`cycle`](#cycle) + - [`get_custody_lines`](#get_custody_lines) + - [Honest peer guide](#honest-peer-guide) +- [Custody](#custody) + - [1. Custody](#1-custody) + - [`CUSTODY_REQUIREMENT`](#custody_requirement) + - [Public, deterministic selection](#public-deterministic-selection) + - [2. Peer discovery](#2-peer-discovery) + - [3. Row/Column gossip](#3-rowcolumn-gossip) + - [Parameters](#parameters) + - [Reconstruction and cross-seeding](#reconstruction-and-cross-seeding) + - [4. Peer sampling](#4-peer-sampling) + - [5. Peer scoring](#5-peer-scoring) + - [6. DAS providers](#6-das-providers) + - [7. A note on fork choice](#7-a-note-on-fork-choice) + + + + +## Custom types + +| Name | SSZ equivalent | Description | +| - | - | - | +| `SampleIndex` | `uint64` | A sample index, corresponding to chunk of extended data | + +## Configuration + +### Data size + +| Name | Value | Description | +| - | - | - | +| `NUMBER_OF_ROWS` | `uint64(2**4)` (= 32) | Number of rows in the 2D data array | +| `NUMBER_OF_COLUMNS` | `uint64(2**4)` (= 32) | Number of columns in the 2D data array | +| `DATA_PER_BLOB` | `FIELD_ELEMENTS_PER_BLOB * BYTES_PER_FIELD_ELEMENT` | Bytes | +| `DATA_PER_SLOT` | `MAX_BLOBS_PER_BLOCK * BLOB_SIZE * 4` | Bytes. Including the extension. | +| `DATA_PER_ROW` | `DATA_PER_SLOT / NUMBER_OF_ROWS` | | +| `DATA_PER_COLUMN` | `DATA_PER_SLOT / NUMBER_OF_COLUMNS` | | +| `DATA_PER_SAMPLE` | `DATA_PER_SLOT / (NUMBER_OF_COLUMNS * NUMBER_OF_ROWS)` | + +### Custody setting + +| Name | Value | Description | +| - | - | - | +| `SAMPLES_PER_SLOT` | `70` | +| `CUSTODY_REQUIREMENT` | `2` | | + +### Helper functions + +#### `cycle` +```python +def cycle(seq: Sequence[Any], start: int) -> Any: + while True: + yield seq[start] + start = (start + 1) % len(seq) +``` + +#### `get_custody_lines` + +```python +def get_custody_lines(node_id: int, epoch: int, custody_size: int, line_type: LineType) -> list[int]: + bound = NUMBER_OF_ROWS if line_type else NUMBER_OF_COLUMNS + all_items = list(range(bound)) + line_index = (node_id + epoch) % bound + iterator = cycle(all_items, line_index) + return [next(iterator) for _ in range(custody_size)] +``` + +#### Honest peer guide + +## Custody + + +#### 1. Custody + +##### `CUSTODY_REQUIREMENT` + +Each node downloads and custodies a minimum of `CUSTODY_REQUIREMENT` rows and `CUSTODY_REQUIREMENT` columns per slot. The particular rows and columns that the node is required to custody are selected pseudo-randomly (more on this below). + +A node *may* choose to custody and serve more than the minimum honesty requirement. Such a node explicitly advertises a number greater than `CUSTODY_REQUIREMENT` via the peer discovery mechanism -- for example, in their ENR (e.g. `custody_lines: 8` if the node custodies `8` rows and `8` columns each slot) -- up to a maximum of `max(NUMBER_OF_ROWS, NUMBER_OF_COLUMNS)` (i.e. a super-full node). + +A node stores the custodied rows/columns for the duration of the pruning period and responds to peer requests for samples on those rows/columns. + +##### Public, deterministic selection + +The particular rows and columns that a node custodies are selected pseudo-randomly as a function of the node-id, epoch, and custody size (sample function interface: `get_custody_lines(config: Config, node_id: int, epoch: int, custody_size: int, line_type: LineType) -> list[int]` and column variant) -- importantly this function can be run by any party as the inputs are all public. + +*Note*: `line_type` could be `LineType.ROW` or `LineType.COLUMN`. + +*Note*: increasing the `custody_size` parameter for a given `node_id` and `epoch` extends the returned list (rather than being an entirely new shuffle) such that if `custody_size` is unknown, the default `CUSTODY_REQUIREMENT` will be correct for a subset of the node's custody. + +*Note*: Even though this function accepts `epoch` as an input, the function can be tuned to remain stable for many epochs depending on network/subnet stability requirements. There is a trade-off between rigidity of the network and the depth to which a subnet can be utilized for recovery. To ensure subnets can be utilized for recovery, staggered rotation needs to happen likely on the order of the prune period. + +#### 2. Peer discovery + +At each slot, a node needs to be able to readily sample from *any* set of rows and columns. To this end, a node should find and maintain a set of diverse and reliable peers that can regularly satisfy their sampling demands. + +A node runs a background peer discovery process, maintaining at least `NUMBER_OF_PEERS` of various custody distributions (both custody_size and row/column assignments). The combination of advertised `custody_size` size and public node-id make this readily, publicly accessible. + +`NUMBER_OF_PEERS` should be tuned upward in the event of failed sampling. + +*Note*: while high-capacity and super-full nodes are high value with respect to satisfying sampling requirements, a node should maintain a distribution across node capacities as to not centralize the p2p graph too much (in the extreme becomes hub/spoke) and to distribute sampling load better across all nodes. + +*Note*: A DHT-based peer discovery mechanism is expected to be utilized in the above. The beacon-chain network currently utilizes discv5 in a similar method as described for finding peers of particular distributions of attestation subnets. Additional peer discovery methods are valuable to integrate (e.g. latent peer discovery via libp2p gossipsub) to add a defense in breadth against one of the discovery methods being attacked. + +#### 3. Row/Column gossip + +##### Parameters + +There are both `NUMBER_OF_ROWS` row and `NUMBER_OF_COLUMNS` column gossip topics. + +1. For each column -- `row_x` for `x` from `0` to `NUMBER_OF_COLUMNS` (non-inclusive). +2. For each row -- `column_y` for `y` from `0` to `NUMBER_OF_ROWS` (non-inclusive). + +To custody a particular row or column, a node joins the respective gossip subnet. Verifiable samples from their respective row/column are gossiped on the assigned subnet. + + +##### Reconstruction and cross-seeding + +In the event a node does *not* receive all samples for a given row/column but does receive enough to reconstruct (e.g. 50%+, a function of coding rate), the node should reconstruct locally and send the reconstructed samples on the subnet. + +Additionally, the node should send (cross-seed) any samples missing from a given row/column they are assigned to that they have obtained via an alternative method (ancillary gossip or reconstruction). E.g., if node reconstructs `row_x` and is also participating in the `column_y` subnet in which the `(x, y)` sample was missing, send the reconstructed sample to `column_y`. + +*Note*: A node is always maintaining a matrix view of the rows and columns they are following, able to cross-reference and cross-seed in either direction. + +*Note*: There are timing considerations to analyze -- at what point does a node consider samples missing and chooses to reconstruct and cross-seed. + +*Note*: There may be anti-DoS and quality-of-service considerations around how to send samples and consider samples -- is each individual sample a message or are they sent in aggregate forms. + +#### 4. Peer sampling + +At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers. A node utilizes `get_custody_lines(..., line_type=LineType.ROW)`/`get_custody_lines(..., line_type=LineType.COLUMN)` to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. + +Upon sampling, the node sends an `DO_YOU_HAVE` packet for all samples to all peers who are determined to custody this sample according to their `get_custody_lines` results. All peers answer first with a bitfield of the samples that they have. + +Upon receiving a sample, a node will pass on the sample to any node which did not previously have this sample, known by `DO_YOU_HAVE` response (but was supposed to have it according to its `get_custody_lines` results). + +#### 5. Peer scoring + +Due to the deterministic custody functions, a node knows exactly what a peer should be able to respond to. In the event that a peer does not respond to samples of their custodied rows/columns, a node may downscore or disconnect from a peer. + +*Note*: a peer might not respond to requests either because they are dishonest (don't actually custody the data), because of bandwidth saturation (local throttling), or because they were, themselves, not able to get all the samples. In the first two cases, the peer is not of consistent DAS value and a node can/should seek to optimize for better peers. In the latter, the node can make local determinations based on repeated `DO_YOU_HAVE` queries to that peer and other peers to assess the value/honesty of the peer. + +#### 6. DAS providers + +A DAS provider is a consistently-available-for-DAS-queries, super-full (or high capacity) node. To the p2p, these look just like other nodes but with high advertised capacity, and they should generally be able to be latently found via normal discovery. + +They can also be found out-of-band and configured into a node to connect to directly and prioritize. E.g., some L2 DAO might support 10 super-full nodes as a public good, and nodes could choose to add some set of these to their local configuration to bolster their DAS quality of service. + +Such direct peering utilizes a feature supported out of the box today on all nodes and can complement (and reduce attackability) of alternative peer discovery mechanisms. + +#### 7. A note on fork choice + +The fork choice rule (essentially a DA filter) is *orthogonal to a given DAS design*, other than the efficiency of particular design impacting it. + +In any DAS design, there are probably a few degrees of freedom around timing, acceptability of short-term re-orgs, etc. + +For example, the fork choice rule might require validators to do successful DAS on slot N to be able to include block of slot N in it's fork choice. That's the tightest DA filter. But trailing filters are also probably acceptable, knowing that there might be some failures/short re-orgs but that it doesn't hurt the aggregate security. E.g. The rule could be -- DAS must be completed for slot N-1 for a child block in N to be included in the fork choice. + +Such trailing techniques and their analyiss will be valuable for any DAS construction. The question is — can you relax how quickly you need to do DA and in the worst case not confirm unavailable data via attestations/finality, and what impact does it have on short-term re-orgs and fast confirmation rules. From 93dddd15f25fa0d2c4e911556f807774e610aa98 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 14 Nov 2023 12:04:42 +0300 Subject: [PATCH 02/25] wip wip Add `TARGET_NUMBER_OF_PEERS` Add networking spec draft fix simplification Rename `DoYouHave` to `GetCustodyStatus` Add DataLineSidecar design Apply suggestions from code review Co-authored-by: dankrad Co-authored-by: danny Revamp after reviews and discussion Remove `CustodyStatus` minor fix Change`DataColumn` to `List[DataCell, MAX_BLOBS_PER_BLOCK]` Move folder Replace `DataColumnByRootAndIndex` with `DataColumnSidecarByRoot` message. Add extended data description Remove `DataRow` Apply suggestions from @jacobkaufmann code review Co-authored-by: Jacob Kaufmann Represent matrix in `BLSFieldElement` form Add `assert time >= store.time` to `on_tick` Revert the spec. Only handle it in tests Remove extra tick cleanup leftover Add randomized block cases Specify RPC byRoot blocks-sidecars elegibility fix typo Update specs/phase0/p2p-interface.md Co-authored-by: Mikhail Kalinin Update specs/deneb/p2p-interface.md Co-authored-by: Mikhail Kalinin add failed on_block condition rephrase Update specs/phase0/p2p-interface.md Co-authored-by: Mikhail Kalinin apply suggestion Update specs/deneb/p2p-interface.md Co-authored-by: danny Update specs/deneb/p2p-interface.md Co-authored-by: danny remove the last consider from on_block to state_transition simplify and add a new rule Update specs/phase0/p2p-interface.md Co-authored-by: Mikhail Kalinin Update specs/deneb/p2p-interface.md Co-authored-by: Mikhail Kalinin Update specs/deneb/p2p-interface.md Co-authored-by: danny remove gossip failure rules Apply suggestions from code review bump version to v1.4.0-beta.5 Move `blob_sidecar_{subnet_id}` to `Blob subnets` section Misc minor fix Add linter support Add column subnet validation. Split `verify_column_sidecar` into two functions Fix `get_data_column_sidecars` by using `compute_samples_and_proofs` Apply suggestions from code review Co-authored-by: danny Do not assign row custody Apply suggestions from code review Co-authored-by: danny Revamp reconstruction section Use depth as the primary preset for inclusion proof. Fix `get_data_column_sidecars` and add tests for merkle proof Change `SAMPLES_PER_SLOT` to 8 and add tests (requirement TBD) Apply PR feedback from @ppopth and @jtraglia Fix `get_data_column_sidecars` Co-authored-by: Pop Chunhapanya Apply suggestions from code review Co-authored-by: Pop Chunhapanya Apply suggestions from code review Co-authored-by: fradamt <104826920+fradamt@users.noreply.github.com> Co-authored-by: Jacob Kaufmann Fix `get_data_column_sidecars` and `get_custody_lines` Apply suggestions from code review Co-authored-by: Jacob Kaufmann Enhance tests fix typo Co-authored-by: fradamt <104826920+fradamt@users.noreply.github.com> Remove `epoch` from `get_custody_lines` fix fix --- configs/mainnet.yaml | 7 +- configs/minimal.yaml | 7 +- presets/mainnet/eip7594.yaml | 4 + presets/minimal/eip7594.yaml | 4 + pysetup/spec_builders/eip7594.py | 10 +- specs/_features/eip7594/das-core.md | 241 ++++++++++++++++++ specs/_features/eip7594/p2p-interface.md | 180 +++++++++++++ .../polynomial-commitments-sampling.md | 2 +- specs/deneb/validator.md | 22 +- specs/peerdas/das-core.md | 174 ------------- .../test/eip7594/merkle_proof/__init__.py | 0 .../merkle_proof/test_single_merkle_proof.py | 74 ++++++ .../unittests/test_config_invariants.py | 17 ++ .../test/eip7594/unittests/test_custody.py | 41 +++ .../test/eip7594/unittests/test_networking.py | 19 ++ .../test/eip7594/unittests/test_security.py | 24 ++ .../core/pyspec/eth2spec/test/helpers/das.py | 0 17 files changed, 634 insertions(+), 192 deletions(-) create mode 100644 specs/_features/eip7594/das-core.md create mode 100644 specs/_features/eip7594/p2p-interface.md delete mode 100644 specs/peerdas/das-core.md create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/merkle_proof/__init__.py create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/merkle_proof/test_single_merkle_proof.py create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py create mode 100644 tests/core/pyspec/eth2spec/test/helpers/das.py diff --git a/configs/mainnet.yaml b/configs/mainnet.yaml index d31634958a..fa664e122a 100644 --- a/configs/mainnet.yaml +++ b/configs/mainnet.yaml @@ -59,6 +59,9 @@ EIP7002_FORK_EPOCH: 18446744073709551615 # WHISK WHISK_FORK_VERSION: 0x06000000 # temporary stub WHISK_FORK_EPOCH: 18446744073709551615 +# EIP7594 +EIP7594_FORK_VERSION: 0x06000001 +EIP7594_FORK_EPOCH: 18446744073709551615 # Time parameters @@ -154,7 +157,3 @@ BLOB_SIDECAR_SUBNET_COUNT: 6 WHISK_EPOCHS_PER_SHUFFLING_PHASE: 256 # `Epoch(2)` WHISK_PROPOSER_SELECTION_GAP: 2 - -# EIP7594 -EIP7594_FORK_VERSION: 0x06000001 -EIP7594_FORK_EPOCH: 18446744073709551615 diff --git a/configs/minimal.yaml b/configs/minimal.yaml index 6b2da84fdb..9ac4249a5d 100644 --- a/configs/minimal.yaml +++ b/configs/minimal.yaml @@ -58,6 +58,9 @@ EIP7002_FORK_EPOCH: 18446744073709551615 # WHISK WHISK_FORK_VERSION: 0x06000001 WHISK_FORK_EPOCH: 18446744073709551615 +# EIP7594 +EIP7594_FORK_VERSION: 0x06000001 +EIP7594_FORK_EPOCH: 18446744073709551615 # Time parameters @@ -153,7 +156,3 @@ BLOB_SIDECAR_SUBNET_COUNT: 6 # Whisk WHISK_EPOCHS_PER_SHUFFLING_PHASE: 4 WHISK_PROPOSER_SELECTION_GAP: 1 - -# EIP7594 -EIP7594_FORK_VERSION: 0x06000001 -EIP7594_FORK_EPOCH: 18446744073709551615 diff --git a/presets/mainnet/eip7594.yaml b/presets/mainnet/eip7594.yaml index c5bc54e525..ab7b8b936e 100644 --- a/presets/mainnet/eip7594.yaml +++ b/presets/mainnet/eip7594.yaml @@ -4,3 +4,7 @@ # --------------------------------------------------------------- # `uint64(2**6)` (= 64) FIELD_ELEMENTS_PER_CELL: 64 +# uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')) +KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH: 4 +# `uint64((FIELD_ELEMENTS_PER_BLOB * 2) // FIELD_ELEMENTS_PER_CELL)` (= 128) +NUMBER_OF_COLUMNS: 128 diff --git a/presets/minimal/eip7594.yaml b/presets/minimal/eip7594.yaml index 0435332859..cf814c9db5 100644 --- a/presets/minimal/eip7594.yaml +++ b/presets/minimal/eip7594.yaml @@ -4,3 +4,7 @@ # --------------------------------------------------------------- # `uint64(2**6)` (= 64) FIELD_ELEMENTS_PER_CELL: 64 +# uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')) +KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH: 4 +# `uint64((FIELD_ELEMENTS_PER_BLOB * 2) // FIELD_ELEMENTS_PER_CELL)` (= 128) +NUMBER_OF_COLUMNS: 128 diff --git a/pysetup/spec_builders/eip7594.py b/pysetup/spec_builders/eip7594.py index 606ce895e7..20d2a6d20d 100644 --- a/pysetup/spec_builders/eip7594.py +++ b/pysetup/spec_builders/eip7594.py @@ -14,7 +14,15 @@ def imports(cls, preset_name: str): ''' @classmethod - def hardcoded_custom_type_dep_constants(cls, spec_object) -> Dict[str, str]: + def hardcoded_custom_type_dep_constants(cls, spec_object) -> str: return { 'FIELD_ELEMENTS_PER_CELL': spec_object.preset_vars['FIELD_ELEMENTS_PER_CELL'].value, + 'NUMBER_OF_COLUMNS': spec_object.preset_vars['NUMBER_OF_COLUMNS'].value, + 'FIELD_ELEMENTS_PER_CELL': spec_object.preset_vars['FIELD_ELEMENTS_PER_CELL'].value, + } + + @classmethod + def hardcoded_func_dep_presets(cls, spec_object) -> Dict[str, str]: + return { + 'KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH': spec_object.preset_vars['KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH'].value, } diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md new file mode 100644 index 0000000000..d191dd90b1 --- /dev/null +++ b/specs/_features/eip7594/das-core.md @@ -0,0 +1,241 @@ +# EIP-7594 -- Data Availability Sampling Core + +**Notice**: This document is a work-in-progress for researchers and implementers. + +## Table of contents + + + + + +- [Custom types](#custom-types) +- [Configuration](#configuration) + - [Data size](#data-size) + - [Custody setting](#custody-setting) + - [Helper functions](#helper-functions) + - [`get_custody_lines`](#get_custody_lines) + - [`compute_extended_data`](#compute_extended_data) + - [`compute_extended_matrix`](#compute_extended_matrix) + - [`compute_samples_and_proofs`](#compute_samples_and_proofs) + - [`get_data_column_sidecars`](#get_data_column_sidecars) +- [Custody](#custody) + - [Custody requirement](#custody-requirement) + - [Public, deterministic selection](#public-deterministic-selection) +- [Peer discovery](#peer-discovery) +- [Extended data](#extended-data) +- [Column gossip](#column-gossip) + - [Parameters](#parameters) + - [Reconstruction and cross-seeding](#reconstruction-and-cross-seeding) +- [Peer sampling](#peer-sampling) +- [Peer scoring](#peer-scoring) +- [DAS providers](#das-providers) +- [A note on fork choice](#a-note-on-fork-choice) +- [FAQs](#faqs) + - [Row (blob) custody](#row-blob-custody) + - [Subnet stability](#subnet-stability) + + + + +## Custom types + +We define the following Python custom types for type hinting and readability: + +| Name | SSZ equivalent | Description | +| - | - | - | +| `DataCell` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | The data unit of a cell in the extended data matrix | +| `DataColumn` | `List[DataCell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP7594 | +| `ExtendedMatrix` | `List[DataCell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data with blobs and one-dimensional erasure coding extension | +| `FlatExtendedMatrix` | `List[BLSFieldElement, MAX_BLOBS_PER_BLOCK * FIELD_ELEMENTS_PER_BLOB * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | +| `LineIndex` | `uint64` | The index of the rows or columns in `FlatExtendedMatrix` matrix | + +## Configuration + +### Data size + +| Name | Value | Description | +| - | - | - | +| `FIELD_ELEMENTS_PER_CELL` | `uint64(2**6)` (= 64) | Elements per `DataCell` | +| `NUMBER_OF_COLUMNS` | `uint64((FIELD_ELEMENTS_PER_BLOB * 2) // FIELD_ELEMENTS_PER_CELL)` (= 128) | Number of columns in the extended data matrix. | + +### Custody setting + +| Name | Value | Description | +| - | - | - | +| `SAMPLES_PER_SLOT` | `8` | Number of random samples a node queries per slot | +| `CUSTODY_REQUIREMENT` | `2` | Minimum number of columns an honest node custodies and serves samples from | +| `TARGET_NUMBER_OF_PEERS` | `70` | Suggested minimum peer count | + +### Helper functions + +#### `get_custody_lines` + +```python +def get_custody_lines(node_id: NodeID, custody_size: uint64) -> Sequence[LineIndex]: + assert custody_size <= NUMBER_OF_COLUMNS + all_items = list(range(NUMBER_OF_COLUMNS)) + line_index = node_id % NUMBER_OF_COLUMNS + return [LineIndex(all_items[(line_index + i) % len(all_items)]) for i in range(custody_size)] +``` + +#### `compute_extended_data` + +```python +def compute_extended_data(data: Sequence[BLSFieldElement]) -> Sequence[BLSFieldElement]: + # TODO + # pylint: disable=unused-argument + ... +``` + +#### `compute_extended_matrix` + +```python +def compute_extended_matrix(blobs: Sequence[Blob]) -> FlatExtendedMatrix: + matrix = [compute_extended_data(blob) for blob in blobs] + return FlatExtendedMatrix(matrix) +``` + +#### `compute_samples_and_proofs` + +```python +def compute_samples_and_proofs(blob: Blob) -> Tuple[ + Vector[DataCell, NUMBER_OF_COLUMNS], + Vector[KZGProof, NUMBER_OF_COLUMNS]]: + """ + Defined in polynomial-commitments-sampling.md + """ + # pylint: disable=unused-argument + ... +``` + +#### `get_data_column_sidecars` + +```python +def get_data_column_sidecars(signed_block: SignedBeaconBlock, + blobs: Sequence[Blob]) -> Sequence[DataColumnSidecar]: + signed_block_header = compute_signed_block_header(signed_block) + block = signed_block.message + kzg_commitments_inclusion_proof = compute_merkle_proof( + block.body, + get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments'), + ) + cells_and_proofs = [compute_samples_and_proofs(blob) for blob in blobs] + blob_count = len(blobs) + cells = [cells_and_proofs[i][0] for i in range(blob_count)] + proofs = [cells_and_proofs[i][1] for i in range(blob_count)] + sidecars = [] + for column_index in range(NUMBER_OF_COLUMNS): + column = DataColumn([cells[row_index][column_index] + for row_index in range(blob_count)]) + kzg_proof_of_column = [proofs[row_index][column_index] + for row_index in range(blob_count)] + sidecars.append(DataColumnSidecar( + index=column_index, + column=column, + kzg_commitments=block.body.blob_kzg_commitments, + kzg_proofs=kzg_proof_of_column, + signed_block_header=signed_block_header, + kzg_commitments_inclusion_proof=kzg_commitments_inclusion_proof, + )) + return sidecars +``` + +## Custody + +### Custody requirement + +Each node downloads and custodies a minimum of `CUSTODY_REQUIREMENT` columns per slot. The particular columns that the node is required to custody are selected pseudo-randomly (more on this below). + +A node *may* choose to custody and serve more than the minimum honesty requirement. Such a node explicitly advertises a number greater than `CUSTODY_REQUIREMENT` via the peer discovery mechanism -- for example, in their ENR (e.g. `custody_lines: 8` if the node custodies `8` columns each slot) -- up to a `NUMBER_OF_COLUMNS` (i.e. a super-full node). + +A node stores the custodied columns for the duration of the pruning period and responds to peer requests for samples on those columns. + +### Public, deterministic selection + +The particular columns that a node custodies are selected pseudo-randomly as a function (`get_custody_lines`) of the node-id and custody size -- importantly this function can be run by any party as the inputs are all public. + +*Note*: increasing the `custody_size` parameter for a given `node_id` extends the returned list (rather than being an entirely new shuffle) such that if `custody_size` is unknown, the default `CUSTODY_REQUIREMENT` will be correct for a subset of the node's custody. + +## Peer discovery + +At each slot, a node needs to be able to readily sample from *any* set of columns. To this end, a node should find and maintain a set of diverse and reliable peers that can regularly satisfy their sampling demands. + +A node runs a background peer discovery process, maintaining at least `TARGET_NUMBER_OF_PEERS` of various custody distributions (both custody_size and column assignments). The combination of advertised `custody_size` size and public node-id make this readily and publicly accessible. + +`TARGET_NUMBER_OF_PEERS` should be tuned upward in the event of failed sampling. + +*Note*: while high-capacity and super-full nodes are high value with respect to satisfying sampling requirements, a node should maintain a distribution across node capacities as to not centralize the p2p graph too much (in the extreme becomes hub/spoke) and to distribute sampling load better across all nodes. + +*Note*: A DHT-based peer discovery mechanism is expected to be utilized in the above. The beacon-chain network currently utilizes discv5 in a similar method as described for finding peers of particular distributions of attestation subnets. Additional peer discovery methods are valuable to integrate (e.g., latent peer discovery via libp2p gossipsub) to add a defense in breadth against one of the discovery methods being attacked. + +## Extended data + +In this construction, we extend the blobs using a one-dimensional erasure coding extension. The matrix comprises maximum `MAX_BLOBS_PER_BLOCK` rows and fixed `NUMBER_OF_COLUMNS` columns, with each row containing a `Blob` and its corresponding extension. + +## Column gossip + +### Parameters + +For each column -- use `data_column_sidecar_{subnet_id}` subnets, where each column index maps to the `subnet_id`. The sidecars can be computed with `get_data_column_sidecars(signed_block: SignedBeaconBlock, blobs: Sequence[Blob])` helper. + +To custody a particular column, a node joins the respective gossip subnet. Verifiable samples from their respective column are gossiped on the assigned subnet. + +### Reconstruction and cross-seeding + +If the node obtains 50%+ of all the columns, they can reconstruct the full data matrix via `recover_samples_impl` helper. + +If a node fails to sample a peer or fails to get a column on the column subnet, a node can utilize the Req/Resp message to query the missing column from other peers. + +Once the node obtain the column, the node should send the missing columns to the column subnets. + +*Note*: A node always maintains a matrix view of the rows and columns they are following, able to cross-reference and cross-seed in either direction. + +*Note*: There are timing considerations to analyze -- at what point does a node consider samples missing and choose to reconstruct and cross-seed. + +*Note*: There may be anti-DoS and quality-of-service considerations around how to send samples and consider samples -- is each individual sample a message or are they sent in aggregate forms. + +## Peer sampling + +At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarByRoot` request. A node utilizes `get_custody_lines` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. + +## Peer scoring + +Due to the deterministic custody functions, a node knows exactly what a peer should be able to respond to. In the event that a peer does not respond to samples of their custodied rows/columns, a node may downscore or disconnect from a peer. + +## DAS providers + +A DAS provider is a consistently-available-for-DAS-queries, super-full (or high capacity) node. To the p2p, these look just like other nodes but with high advertised capacity, and they should generally be able to be latently found via normal discovery. + +DAS providers can also be found out-of-band and configured into a node to connect to directly and prioritize. Nodes can add some set of these to their local configuration for persistent connection to bolster their DAS quality of service. + +Such direct peering utilizes a feature supported out of the box today on all nodes and can complement (and reduce attackability and increase quality-of-service) alternative peer discovery mechanisms. + +## A note on fork choice + +*Fork choice spec TBD, but it will just be a replacement of `is_data_available()` call in Deneb with column sampling instead of full download. Note the `is_data_available(slot_N)` will likely do a `-1` follow distance so that you just need to check the availability of slot `N-1` for slot `N` (starting with the block proposer of `N`).* + +The fork choice rule (essentially a DA filter) is *orthogonal to a given DAS design*, other than the efficiency of a particular design impacting it. + +In any DAS design, there are probably a few degrees of freedom around timing, acceptability of short-term re-orgs, etc. + +For example, the fork choice rule might require validators to do successful DAS on slot N to be able to include block of slot `N` in its fork choice. That's the tightest DA filter. But trailing filters are also probably acceptable, knowing that there might be some failures/short re-orgs but that they don't hurt the aggregate security. For example, the rule could be — DAS must be completed for slot N-1 for a child block in N to be included in the fork choice. + +Such trailing techniques and their analysis will be valuable for any DAS construction. The question is — can you relax how quickly you need to do DA and in the worst case not confirm unavailable data via attestations/finality, and what impact does it have on short-term re-orgs and fast confirmation rules. + +## FAQs + +### Row (blob) custody + +In the one-dimension construction, a node samples the peers by requesting the whole `DataColumn`. In reconstruction, a node can reconstruct all the blobs by 50% of the columns. Note that nodes can still download the row via `blob_sidecar_{subnet_id}` subnets. + +The potential benefits of having row custody could include: + +1. Allow for more "natural" distribution of data to consumers -- e.g., roll-ups -- but honestly, they won't know a priori which row their blob is going to be included in in the block, so they would either need to listen to all rows or download a particular row after seeing the block. The former looks just like listening to column [0, N) and the latter is req/resp instead of gossiping. +2. Help with some sort of distributed reconstruction. Those with full rows can compute extensions and seed missing samples to the network. This would either need to be able to send individual points on the gossip or would need some sort of req/resp faculty, potentially similar to an `IHAVEPOINTBITFIELD` and `IWANTSAMPLE`. + +However, for simplicity, we don't assign row custody assignments to nodes in the current design. + + +### Subnet stability + +To start with a simple, stable backbone, for now, we don't shuffle the subnet assignments via the deterministic custody selection helper `get_custody_lines`. However, staggered rotation likely needs to happen on the order of the pruning period to ensure subnets can be utilized for recovery. For example, introducing an `epoch` argument allows the function to maintain stability over many epochs. diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md new file mode 100644 index 0000000000..4d55a69ef8 --- /dev/null +++ b/specs/_features/eip7594/p2p-interface.md @@ -0,0 +1,180 @@ +# EIP-7594 -- Networking + +**Notice**: This document is a work-in-progress for researchers and implementers. + +## Table of contents + + + + + +- [Modifications in EIP-7594](#modifications-in-eip-7594) + - [Preset](#preset) + - [Configuration](#configuration) + - [Containers](#containers) + - [`DataColumnSidecar`](#datacolumnsidecar) + - [`DataColumnIdentifier`](#datacolumnidentifier) + - [Helpers](#helpers) + - [`verify_data_column_sidecar_kzg_proof`](#verify_data_column_sidecar_kzg_proof) + - [`verify_data_column_sidecar_inclusion_proof`](#verify_data_column_sidecar_inclusion_proof) + - [`compute_subnet_for_data_column_sidecar`](#compute_subnet_for_data_column_sidecar) + - [The gossip domain: gossipsub](#the-gossip-domain-gossipsub) + - [Topics and messages](#topics-and-messages) + - [Samples subnets](#samples-subnets) + - [`data_column_sidecar_{subnet_id}`](#data_column_sidecar_subnet_id) + - [The Req/Resp domain](#the-reqresp-domain) + - [Messages](#messages) + - [DataColumnSidecarByRoot v1](#datacolumnsidecarbyroot-v1) + + + + +## Modifications in EIP-7594 + +### Preset + +| Name | Value | Description | +|------------------------------------------|-----------------------------------|---------------------------------------------------------------------| +| `KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH` | `uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')))` (= 4) | Merkle proof index for `blob_kzg_commitments` | + +### Configuration + +| Name | Value | Description | +|------------------------------------------|-----------------------------------|---------------------------------------------------------------------| +| `DATA_COLUMN_SIDECAR_SUBNET_COUNT` | `32` | The number of data column sidecar subnets used in the gossipsub protocol. | + +### Containers + +#### `DataColumnSidecar` + +```python +class DataColumnSidecar(Container): + index: LineIndex # Index of column in extended matrix + column: DataColumn + kzg_commitments: List[KZGCommitment, MAX_BLOB_COMMITMENTS_PER_BLOCK] + kzg_proofs: List[KZGProof, MAX_BLOB_COMMITMENTS_PER_BLOCK] + signed_block_header: SignedBeaconBlockHeader + kzg_commitments_inclusion_proof: Vector[Bytes32, KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH] +``` + +#### `DataColumnIdentifier` + +```python +class DataColumnIdentifier(Container): + block_root: Root + index: LineIndex +``` + +### Helpers + +##### `verify_data_column_sidecar_kzg_proof` + +```python +def verify_data_column_sidecar_kzg_proof(sidecar: DataColumnSidecar) -> bool: + """ + Verify if the proofs are correct + """ + row_ids = [LineIndex(i) for i in range(len(sidecar.column))] + assert len(sidecar.column) == len(sidecar.kzg_commitments) == len(sidecar.kzg_proofs) + + # KZG batch verifies that the cells match the corresponding commitments and proofs + return verify_cell_proof_batch( + row_commitments=sidecar.kzg_commitments, + row_ids=row_ids, # all rows + column_ids=[sidecar.index], + datas=sidecar.column, + proofs=sidecar.kzg_proofs, + ) +``` + +##### `verify_data_column_sidecar_inclusion_proof` + +```python +def verify_data_column_sidecar_inclusion_proof(sidecar: DataColumnSidecar) -> bool: + """ + Verify if the given KZG commitments included in the given beacon block. + """ + gindex = get_subtree_index(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')) + return is_valid_merkle_branch( + leaf=hash_tree_root(sidecar.kzg_commitments), + branch=sidecar.kzg_commitments_inclusion_proof, + depth=KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH, + index=gindex, + root=sidecar.signed_block_header.message.body_root, + ) +``` + +##### `compute_subnet_for_data_column_sidecar` + +```python +def compute_subnet_for_data_column_sidecar(column_index: LineIndex) -> SubnetID: + return SubnetID(column_index % DATA_COLUMN_SIDECAR_SUBNET_COUNT) +``` + + +### The gossip domain: gossipsub + +Some gossip meshes are upgraded in the EIP-7594 fork to support upgraded types. + +#### Topics and messages + +##### Samples subnets + +###### `data_column_sidecar_{subnet_id}` + +This topic is used to propagate column sidecars, where each column maps to some `subnet_id`. + +The *type* of the payload of this topic is `DataColumnSidecar`. + +The following validations MUST pass before forwarding the `sidecar: DataColumnSidecar` on the network, assuming the alias `block_header = sidecar.signed_block_header.message`: + +- _[REJECT]_ The sidecar's index is consistent with `NUMBER_OF_COLUMNS` -- i.e. `sidecar.index < NUMBER_OF_COLUMNS`. +- _[REJECT]_ The sidecar is for the correct subnet -- i.e. `compute_subnet_for_data_column_sidecar(sidecar.index) == subnet_id`. +- _[IGNORE]_ The sidecar is not from a future slot (with a `MAXIMUM_GOSSIP_CLOCK_DISPARITY` allowance) -- i.e. validate that `block_header.slot <= current_slot` (a client MAY queue future sidecars for processing at the appropriate slot). +- _[IGNORE]_ The sidecar is from a slot greater than the latest finalized slot -- i.e. validate that `block_header.slot > compute_start_slot_at_epoch(state.finalized_checkpoint.epoch)` +- _[REJECT]_ The proposer signature of `sidecar.signed_block_header`, is valid with respect to the `block_header.proposer_index` pubkey. +- _[IGNORE]_ The sidecar's block's parent (defined by `block_header.parent_root`) has been seen (via both gossip and non-gossip sources) (a client MAY queue sidecars for processing once the parent block is retrieved). +- _[REJECT]_ The sidecar's block's parent (defined by `block_header.parent_root`) passes validation. +- _[REJECT]_ The sidecar is from a higher slot than the sidecar's block's parent (defined by `block_header.parent_root`). +- _[REJECT]_ The current finalized_checkpoint is an ancestor of the sidecar's block -- i.e. `get_checkpoint_block(store, block_header.parent_root, store.finalized_checkpoint.epoch) == store.finalized_checkpoint.root`. +- _[REJECT]_ The sidecar's `kzg_commitments` field inclusion proof is valid as verified by `verify_data_column_sidecar_inclusion_proof(sidecar)`. +- _[REJECT]_ The sidecar's column data is valid as verified by `verify_data_column_sidecar_kzg_proof(sidecar)`. +- _[IGNORE]_ The sidecar is the first sidecar for the tuple `(block_header.slot, block_header.proposer_index, sidecar.index)` with valid header signature, sidecar inclusion proof, and kzg proof. +- _[REJECT]_ The sidecar is proposed by the expected `proposer_index` for the block's slot in the context of the current shuffling (defined by `block_header.parent_root`/`block_header.slot`). + If the `proposer_index` cannot immediately be verified against the expected shuffling, the sidecar MAY be queued for later processing while proposers for the block's branch are calculated -- in such a case _do not_ `REJECT`, instead `IGNORE` this message. + +*Note:* In the `verify_data_column_sidecar_inclusion_proof(sidecar)` check, for all the sidecars of the same block, it verifies against the same set of `kzg_commitments` of the given beacon beacon. Client can choose to cache the result of the arguments tuple `(sidecar.kzg_commitments, sidecar.kzg_commitments_inclusion_proof, sidecar.signed_block_header)`. + +### The Req/Resp domain + +#### Messages + +##### DataColumnSidecarByRoot v1 + +**Protocol ID:** `/eth2/beacon_chain/req/data_column_sidecar_by_root/1/` + +*[New in Deneb:EIP4844]* + +The `` field is calculated as `context = compute_fork_digest(fork_version, genesis_validators_root)`: + +[1]: # (eth2spec: skip) + +| `fork_version` | Chunk SSZ type | +|--------------------------|-------------------------------| +| `EIP7594_FORK_VERSION` | `eip7594.DataColumnSidecar` | + +Request Content: + +``` +( + DataColumnIdentifier +) +``` + +Response Content: + +``` +( + DataColumnSidecar +) +``` diff --git a/specs/_features/eip7594/polynomial-commitments-sampling.md b/specs/_features/eip7594/polynomial-commitments-sampling.md index b356792e34..e09888a31b 100644 --- a/specs/_features/eip7594/polynomial-commitments-sampling.md +++ b/specs/_features/eip7594/polynomial-commitments-sampling.md @@ -1,4 +1,4 @@ -# Deneb -- Polynomial Commitments +# EIP-7594 -- Polynomial Commitments ## Table of contents diff --git a/specs/deneb/validator.md b/specs/deneb/validator.md index 3e2c91f817..900da37a1f 100644 --- a/specs/deneb/validator.md +++ b/specs/deneb/validator.md @@ -63,6 +63,19 @@ class GetPayloadResponse(object): blobs_bundle: BlobsBundle # [New in Deneb:EIP4844] ``` +```python +def compute_signed_block_header(signed_block: SignedBeaconBlock) -> SignedBeaconBlockHeader: + block = signed_block.message + block_header = BeaconBlockHeader( + slot=block.slot, + proposer_index=block.proposer_index, + parent_root=block.parent_root, + state_root=block.state_root, + body_root=hash_tree_root(block.body), + ) + return SignedBeaconBlockHeader(message=block_header, signature=signed_block.signature) +``` + ## Protocol ### `ExecutionEngine` @@ -147,14 +160,7 @@ def get_blob_sidecars(signed_block: SignedBeaconBlock, blobs: Sequence[Blob], blob_kzg_proofs: Sequence[KZGProof]) -> Sequence[BlobSidecar]: block = signed_block.message - block_header = BeaconBlockHeader( - slot=block.slot, - proposer_index=block.proposer_index, - parent_root=block.parent_root, - state_root=block.state_root, - body_root=hash_tree_root(block.body), - ) - signed_block_header = SignedBeaconBlockHeader(message=block_header, signature=signed_block.signature) + signed_block_header = compute_signed_block_header(signed_block) return [ BlobSidecar( index=index, diff --git a/specs/peerdas/das-core.md b/specs/peerdas/das-core.md deleted file mode 100644 index c6c3811a12..0000000000 --- a/specs/peerdas/das-core.md +++ /dev/null @@ -1,174 +0,0 @@ -# Peer Data Availability Sampling -- Core - -**Notice**: This document is a work-in-progress for researchers and implementers. - -## Table of contents - - - - - -- [Custom types](#custom-types) -- [Configuration](#configuration) - - [Data size](#data-size) - - [Custody setting](#custody-setting) - - [Helper functions](#helper-functions) - - [`cycle`](#cycle) - - [`get_custody_lines`](#get_custody_lines) - - [Honest peer guide](#honest-peer-guide) -- [Custody](#custody) - - [1. Custody](#1-custody) - - [`CUSTODY_REQUIREMENT`](#custody_requirement) - - [Public, deterministic selection](#public-deterministic-selection) - - [2. Peer discovery](#2-peer-discovery) - - [3. Row/Column gossip](#3-rowcolumn-gossip) - - [Parameters](#parameters) - - [Reconstruction and cross-seeding](#reconstruction-and-cross-seeding) - - [4. Peer sampling](#4-peer-sampling) - - [5. Peer scoring](#5-peer-scoring) - - [6. DAS providers](#6-das-providers) - - [7. A note on fork choice](#7-a-note-on-fork-choice) - - - - -## Custom types - -| Name | SSZ equivalent | Description | -| - | - | - | -| `SampleIndex` | `uint64` | A sample index, corresponding to chunk of extended data | - -## Configuration - -### Data size - -| Name | Value | Description | -| - | - | - | -| `NUMBER_OF_ROWS` | `uint64(2**4)` (= 32) | Number of rows in the 2D data array | -| `NUMBER_OF_COLUMNS` | `uint64(2**4)` (= 32) | Number of columns in the 2D data array | -| `DATA_PER_BLOB` | `FIELD_ELEMENTS_PER_BLOB * BYTES_PER_FIELD_ELEMENT` | Bytes | -| `DATA_PER_SLOT` | `MAX_BLOBS_PER_BLOCK * BLOB_SIZE * 4` | Bytes. Including the extension. | -| `DATA_PER_ROW` | `DATA_PER_SLOT / NUMBER_OF_ROWS` | | -| `DATA_PER_COLUMN` | `DATA_PER_SLOT / NUMBER_OF_COLUMNS` | | -| `DATA_PER_SAMPLE` | `DATA_PER_SLOT / (NUMBER_OF_COLUMNS * NUMBER_OF_ROWS)` | - -### Custody setting - -| Name | Value | Description | -| - | - | - | -| `SAMPLES_PER_SLOT` | `70` | -| `CUSTODY_REQUIREMENT` | `2` | | - -### Helper functions - -#### `cycle` -```python -def cycle(seq: Sequence[Any], start: int) -> Any: - while True: - yield seq[start] - start = (start + 1) % len(seq) -``` - -#### `get_custody_lines` - -```python -def get_custody_lines(node_id: int, epoch: int, custody_size: int, line_type: LineType) -> list[int]: - bound = NUMBER_OF_ROWS if line_type else NUMBER_OF_COLUMNS - all_items = list(range(bound)) - line_index = (node_id + epoch) % bound - iterator = cycle(all_items, line_index) - return [next(iterator) for _ in range(custody_size)] -``` - -#### Honest peer guide - -## Custody - - -#### 1. Custody - -##### `CUSTODY_REQUIREMENT` - -Each node downloads and custodies a minimum of `CUSTODY_REQUIREMENT` rows and `CUSTODY_REQUIREMENT` columns per slot. The particular rows and columns that the node is required to custody are selected pseudo-randomly (more on this below). - -A node *may* choose to custody and serve more than the minimum honesty requirement. Such a node explicitly advertises a number greater than `CUSTODY_REQUIREMENT` via the peer discovery mechanism -- for example, in their ENR (e.g. `custody_lines: 8` if the node custodies `8` rows and `8` columns each slot) -- up to a maximum of `max(NUMBER_OF_ROWS, NUMBER_OF_COLUMNS)` (i.e. a super-full node). - -A node stores the custodied rows/columns for the duration of the pruning period and responds to peer requests for samples on those rows/columns. - -##### Public, deterministic selection - -The particular rows and columns that a node custodies are selected pseudo-randomly as a function of the node-id, epoch, and custody size (sample function interface: `get_custody_lines(config: Config, node_id: int, epoch: int, custody_size: int, line_type: LineType) -> list[int]` and column variant) -- importantly this function can be run by any party as the inputs are all public. - -*Note*: `line_type` could be `LineType.ROW` or `LineType.COLUMN`. - -*Note*: increasing the `custody_size` parameter for a given `node_id` and `epoch` extends the returned list (rather than being an entirely new shuffle) such that if `custody_size` is unknown, the default `CUSTODY_REQUIREMENT` will be correct for a subset of the node's custody. - -*Note*: Even though this function accepts `epoch` as an input, the function can be tuned to remain stable for many epochs depending on network/subnet stability requirements. There is a trade-off between rigidity of the network and the depth to which a subnet can be utilized for recovery. To ensure subnets can be utilized for recovery, staggered rotation needs to happen likely on the order of the prune period. - -#### 2. Peer discovery - -At each slot, a node needs to be able to readily sample from *any* set of rows and columns. To this end, a node should find and maintain a set of diverse and reliable peers that can regularly satisfy their sampling demands. - -A node runs a background peer discovery process, maintaining at least `NUMBER_OF_PEERS` of various custody distributions (both custody_size and row/column assignments). The combination of advertised `custody_size` size and public node-id make this readily, publicly accessible. - -`NUMBER_OF_PEERS` should be tuned upward in the event of failed sampling. - -*Note*: while high-capacity and super-full nodes are high value with respect to satisfying sampling requirements, a node should maintain a distribution across node capacities as to not centralize the p2p graph too much (in the extreme becomes hub/spoke) and to distribute sampling load better across all nodes. - -*Note*: A DHT-based peer discovery mechanism is expected to be utilized in the above. The beacon-chain network currently utilizes discv5 in a similar method as described for finding peers of particular distributions of attestation subnets. Additional peer discovery methods are valuable to integrate (e.g. latent peer discovery via libp2p gossipsub) to add a defense in breadth against one of the discovery methods being attacked. - -#### 3. Row/Column gossip - -##### Parameters - -There are both `NUMBER_OF_ROWS` row and `NUMBER_OF_COLUMNS` column gossip topics. - -1. For each column -- `row_x` for `x` from `0` to `NUMBER_OF_COLUMNS` (non-inclusive). -2. For each row -- `column_y` for `y` from `0` to `NUMBER_OF_ROWS` (non-inclusive). - -To custody a particular row or column, a node joins the respective gossip subnet. Verifiable samples from their respective row/column are gossiped on the assigned subnet. - - -##### Reconstruction and cross-seeding - -In the event a node does *not* receive all samples for a given row/column but does receive enough to reconstruct (e.g. 50%+, a function of coding rate), the node should reconstruct locally and send the reconstructed samples on the subnet. - -Additionally, the node should send (cross-seed) any samples missing from a given row/column they are assigned to that they have obtained via an alternative method (ancillary gossip or reconstruction). E.g., if node reconstructs `row_x` and is also participating in the `column_y` subnet in which the `(x, y)` sample was missing, send the reconstructed sample to `column_y`. - -*Note*: A node is always maintaining a matrix view of the rows and columns they are following, able to cross-reference and cross-seed in either direction. - -*Note*: There are timing considerations to analyze -- at what point does a node consider samples missing and chooses to reconstruct and cross-seed. - -*Note*: There may be anti-DoS and quality-of-service considerations around how to send samples and consider samples -- is each individual sample a message or are they sent in aggregate forms. - -#### 4. Peer sampling - -At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers. A node utilizes `get_custody_lines(..., line_type=LineType.ROW)`/`get_custody_lines(..., line_type=LineType.COLUMN)` to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. - -Upon sampling, the node sends an `DO_YOU_HAVE` packet for all samples to all peers who are determined to custody this sample according to their `get_custody_lines` results. All peers answer first with a bitfield of the samples that they have. - -Upon receiving a sample, a node will pass on the sample to any node which did not previously have this sample, known by `DO_YOU_HAVE` response (but was supposed to have it according to its `get_custody_lines` results). - -#### 5. Peer scoring - -Due to the deterministic custody functions, a node knows exactly what a peer should be able to respond to. In the event that a peer does not respond to samples of their custodied rows/columns, a node may downscore or disconnect from a peer. - -*Note*: a peer might not respond to requests either because they are dishonest (don't actually custody the data), because of bandwidth saturation (local throttling), or because they were, themselves, not able to get all the samples. In the first two cases, the peer is not of consistent DAS value and a node can/should seek to optimize for better peers. In the latter, the node can make local determinations based on repeated `DO_YOU_HAVE` queries to that peer and other peers to assess the value/honesty of the peer. - -#### 6. DAS providers - -A DAS provider is a consistently-available-for-DAS-queries, super-full (or high capacity) node. To the p2p, these look just like other nodes but with high advertised capacity, and they should generally be able to be latently found via normal discovery. - -They can also be found out-of-band and configured into a node to connect to directly and prioritize. E.g., some L2 DAO might support 10 super-full nodes as a public good, and nodes could choose to add some set of these to their local configuration to bolster their DAS quality of service. - -Such direct peering utilizes a feature supported out of the box today on all nodes and can complement (and reduce attackability) of alternative peer discovery mechanisms. - -#### 7. A note on fork choice - -The fork choice rule (essentially a DA filter) is *orthogonal to a given DAS design*, other than the efficiency of particular design impacting it. - -In any DAS design, there are probably a few degrees of freedom around timing, acceptability of short-term re-orgs, etc. - -For example, the fork choice rule might require validators to do successful DAS on slot N to be able to include block of slot N in it's fork choice. That's the tightest DA filter. But trailing filters are also probably acceptable, knowing that there might be some failures/short re-orgs but that it doesn't hurt the aggregate security. E.g. The rule could be -- DAS must be completed for slot N-1 for a child block in N to be included in the fork choice. - -Such trailing techniques and their analyiss will be valuable for any DAS construction. The question is — can you relax how quickly you need to do DA and in the worst case not confirm unavailable data via attestations/finality, and what impact does it have on short-term re-orgs and fast confirmation rules. diff --git a/tests/core/pyspec/eth2spec/test/eip7594/merkle_proof/__init__.py b/tests/core/pyspec/eth2spec/test/eip7594/merkle_proof/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/core/pyspec/eth2spec/test/eip7594/merkle_proof/test_single_merkle_proof.py b/tests/core/pyspec/eth2spec/test/eip7594/merkle_proof/test_single_merkle_proof.py new file mode 100644 index 0000000000..222f59b775 --- /dev/null +++ b/tests/core/pyspec/eth2spec/test/eip7594/merkle_proof/test_single_merkle_proof.py @@ -0,0 +1,74 @@ +import random + +from eth2spec.test.context import ( + spec_state_test, + with_eip7594_and_later, + with_test_suite_name, +) +from eth2spec.test.helpers.block import ( + build_empty_block_for_next_slot, + sign_block, +) +from eth2spec.test.helpers.execution_payload import ( + compute_el_block_hash, +) +from eth2spec.test.helpers.sharding import ( + get_sample_opaque_tx, +) +from eth2spec.debug.random_value import ( + RandomizationMode, + get_random_ssz_object, +) + + +def _run_blob_kzg_commitments_merkle_proof_test(spec, state, rng=None): + opaque_tx, blobs, blob_kzg_commitments, proofs = get_sample_opaque_tx(spec, blob_count=1) + if rng is None: + block = build_empty_block_for_next_slot(spec, state) + else: + block = get_random_ssz_object( + rng, + spec.BeaconBlock, + max_bytes_length=2000, + max_list_length=2000, + mode=RandomizationMode, + chaos=True, + ) + block.body.blob_kzg_commitments = blob_kzg_commitments + block.body.execution_payload.transactions = [opaque_tx] + block.body.execution_payload.block_hash = compute_el_block_hash(spec, block.body.execution_payload) + signed_block = sign_block(spec, state, block, proposer_index=0) + column_sidcars = spec.get_data_column_sidecars(signed_block, blobs) + column_sidcar = column_sidcars[0] + + yield "object", block.body + kzg_commitments_inclusion_proof = column_sidcar.kzg_commitments_inclusion_proof + gindex = spec.get_generalized_index(spec.BeaconBlockBody, 'blob_kzg_commitments') + yield "proof", { + "leaf": "0x" + column_sidcar.kzg_commitments.hash_tree_root().hex(), + "leaf_index": gindex, + "branch": ['0x' + root.hex() for root in kzg_commitments_inclusion_proof] + } + assert spec.is_valid_merkle_branch( + leaf=column_sidcar.kzg_commitments.hash_tree_root(), + branch=column_sidcar.kzg_commitments_inclusion_proof, + depth=spec.floorlog2(gindex), + index=spec.get_subtree_index(gindex), + root=column_sidcar.signed_block_header.message.body_root, + ) + assert spec.verify_data_column_sidecar_inclusion_proof(column_sidcar) + + +@with_test_suite_name("BeaconBlockBody") +@with_eip7594_and_later +@spec_state_test +def test_blob_kzg_commitments_merkle_proof__basic(spec, state): + yield from _run_blob_kzg_commitments_merkle_proof_test(spec, state) + + +@with_test_suite_name("BeaconBlockBody") +@with_eip7594_and_later +@spec_state_test +def test_blob_kzg_commitments_merkle_proof__random_block_1(spec, state): + rng = random.Random(1111) + yield from _run_blob_kzg_commitments_merkle_proof_test(spec, state, rng=rng) diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py new file mode 100644 index 0000000000..712e9892c0 --- /dev/null +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py @@ -0,0 +1,17 @@ +from eth2spec.test.context import ( + spec_test, + single_phase, + with_eip7594_and_later, +) + + +@with_eip7594_and_later +@spec_test +@single_phase +def test_invariants(spec): + assert spec.FIELD_ELEMENTS_PER_BLOB % spec.FIELD_ELEMENTS_PER_CELL == 0 + assert spec.FIELD_ELEMENTS_PER_BLOB * 2 % spec.NUMBER_OF_COLUMNS == 0 + assert spec.SAMPLES_PER_SLOT <= spec.NUMBER_OF_COLUMNS + assert spec.CUSTODY_REQUIREMENT <= spec.NUMBER_OF_COLUMNS + assert spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT <= spec.NUMBER_OF_COLUMNS + assert spec.NUMBER_OF_COLUMNS % spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT == 0 diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py new file mode 100644 index 0000000000..d629b3a066 --- /dev/null +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py @@ -0,0 +1,41 @@ +from eth2spec.test.context import ( + expect_assertion_error, + spec_test, + single_phase, + with_eip7594_and_later, +) + + +@with_eip7594_and_later +@spec_test +@single_phase +def test_get_custody_lines_peers_within_number_of_columns(spec): + peer_count = 10 + custody_size = spec.CUSTODY_REQUIREMENT + assert spec.NUMBER_OF_COLUMNS > peer_count + assignments = [spec.get_custody_lines(node_id, custody_size) for node_id in range(peer_count)] + + for assignment in assignments: + assert len(assignment) == custody_size + + +@with_eip7594_and_later +@spec_test +@single_phase +def test_get_custody_lines_peers_more_than_number_of_columns(spec): + peer_count = 200 + custody_size = spec.CUSTODY_REQUIREMENT + assert spec.NUMBER_OF_COLUMNS < peer_count + assignments = [spec.get_custody_lines(node_id, custody_size) for node_id in range(peer_count)] + + for assignment in assignments: + assert len(assignment) == custody_size + + +@with_eip7594_and_later +@spec_test +@single_phase +def test_get_custody_lines_custody_size_more_than_number_of_columns(spec): + node_id = 1 + custody_size = spec.NUMBER_OF_COLUMNS + 1 + expect_assertion_error(lambda: spec.get_custody_lines(node_id, custody_size)) diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py new file mode 100644 index 0000000000..972539f5cf --- /dev/null +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py @@ -0,0 +1,19 @@ +from eth2spec.test.context import ( + spec_test, + single_phase, + with_eip7594_and_later, +) + + +@with_eip7594_and_later +@spec_test +@single_phase +def test_compute_subnet_for_data_column_sidecar(spec): + subnet_results = [] + for column_index in range(spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT): + subnet_results.append(spec.compute_subnet_for_data_column_sidecar(column_index)) + # no duplicates + assert len(subnet_results) == len(set(subnet_results)) + # next one should be duplicate + next_subnet = spec.compute_subnet_for_data_column_sidecar(spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT) + assert next_subnet == subnet_results[0] diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py new file mode 100644 index 0000000000..07669dafc9 --- /dev/null +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py @@ -0,0 +1,24 @@ +from eth2spec.test.context import ( + MAINNET, + spec_test, + single_phase, + with_eip7594_and_later, + with_phases, +) + + +@with_eip7594_and_later +@spec_test +@single_phase +@with_phases([MAINNET]) +def test_sampling_config(spec): + probability_of_unavailable = 2 ** (-int(spec.SAMPLES_PER_SLOT)) + # TODO: What is the security requirement? + security_requirement = 0.01 + assert probability_of_unavailable <= security_requirement + + column_size_in_bytes = spec.FIELD_ELEMENTS_PER_CELL * spec.BYTES_PER_FIELD_ELEMENT * spec.MAX_BLOBS_PER_BLOCK + bytes_per_slot = column_size_in_bytes * spec.SAMPLES_PER_SLOT + # TODO: What is the bandwidth requirement? + bandwidth_requirement = 10000 # bytes/s + assert bytes_per_slot // spec.config.SECONDS_PER_SLOT < bandwidth_requirement diff --git a/tests/core/pyspec/eth2spec/test/helpers/das.py b/tests/core/pyspec/eth2spec/test/helpers/das.py new file mode 100644 index 0000000000..e69de29bb2 From 504b4f90367dbb3db7c27f06f0c860e09dc1fb26 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 12 Jan 2024 00:36:06 +0800 Subject: [PATCH 03/25] Migrating to latest crypto APIs --- pysetup/helpers.py | 5 ++--- pysetup/md_doc_paths.py | 17 ++++++++++++++++- pysetup/spec_builders/eip7594.py | 1 - specs/_features/eip7594/das-core.md | 22 +++------------------- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/pysetup/helpers.py b/pysetup/helpers.py index b2fe00f75e..c4e36200be 100644 --- a/pysetup/helpers.py +++ b/pysetup/helpers.py @@ -203,8 +203,7 @@ def dependency_order_class_objects(objects: Dict[str, str], custom_types: Dict[s for item in [dep, key] + key_list[key_list.index(dep)+1:]: objects[item] = objects.pop(item) - -def combine_ssz_objects(old_objects: Dict[str, str], new_objects: Dict[str, str], custom_types) -> Dict[str, str]: +def combine_ssz_objects(old_objects: Dict[str, str], new_objects: Dict[str, str]) -> Dict[str, str]: """ Takes in old spec and new spec ssz objects, combines them, and returns the newer versions of the objects in dependency order. @@ -226,7 +225,7 @@ def combine_spec_objects(spec0: SpecObject, spec1: SpecObject) -> SpecObject: config_vars = combine_dicts(spec0.config_vars, spec1.config_vars) ssz_dep_constants = combine_dicts(spec0.ssz_dep_constants, spec1.ssz_dep_constants) func_dep_presets = combine_dicts(spec0.func_dep_presets, spec1.func_dep_presets) - ssz_objects = combine_ssz_objects(spec0.ssz_objects, spec1.ssz_objects, custom_types) + ssz_objects = combine_ssz_objects(spec0.ssz_objects, spec1.ssz_objects) dataclasses = combine_dicts(spec0.dataclasses, spec1.dataclasses) return SpecObject( functions=functions, diff --git a/pysetup/md_doc_paths.py b/pysetup/md_doc_paths.py index c93c0c0321..17c3c26fce 100644 --- a/pysetup/md_doc_paths.py +++ b/pysetup/md_doc_paths.py @@ -35,6 +35,11 @@ BELLATRIX: "sync/optimistic.md" } +DEFAULT_ORDER = ( + "beacon-chain", + "polynomial-commitments", +) + def is_post_fork(a, b) -> bool: """ @@ -62,15 +67,25 @@ def get_fork_directory(fork): raise FileNotFoundError(f"No directory found for fork: {fork}") +def sort_key(s): + for index, key in enumerate(DEFAULT_ORDER): + if key in s: + return (index, s) + return (len(DEFAULT_ORDER), s) + + def get_md_doc_paths(spec_fork: str) -> str: md_doc_paths = "" for fork in ALL_FORKS: if is_post_fork(spec_fork, fork): # Append all files in fork directory recursively - for root, dirs, files in os.walk(get_fork_directory(fork)): + for root, _, files in os.walk(get_fork_directory(fork)): + filepaths = [] for filename in files: filepath = os.path.join(root, filename) + filepaths.append(filepath) + for filepath in sorted(filepaths, key=sort_key): if filepath.endswith('.md') and filepath not in IGNORE_SPEC_FILES: md_doc_paths += filepath + "\n" # Append extra files if any diff --git a/pysetup/spec_builders/eip7594.py b/pysetup/spec_builders/eip7594.py index 20d2a6d20d..4ad02fc07e 100644 --- a/pysetup/spec_builders/eip7594.py +++ b/pysetup/spec_builders/eip7594.py @@ -18,7 +18,6 @@ def hardcoded_custom_type_dep_constants(cls, spec_object) -> str: return { 'FIELD_ELEMENTS_PER_CELL': spec_object.preset_vars['FIELD_ELEMENTS_PER_CELL'].value, 'NUMBER_OF_COLUMNS': spec_object.preset_vars['NUMBER_OF_COLUMNS'].value, - 'FIELD_ELEMENTS_PER_CELL': spec_object.preset_vars['FIELD_ELEMENTS_PER_CELL'].value, } @classmethod diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index d191dd90b1..635fa35f17 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -16,7 +16,6 @@ - [`get_custody_lines`](#get_custody_lines) - [`compute_extended_data`](#compute_extended_data) - [`compute_extended_matrix`](#compute_extended_matrix) - - [`compute_samples_and_proofs`](#compute_samples_and_proofs) - [`get_data_column_sidecars`](#get_data_column_sidecars) - [Custody](#custody) - [Custody requirement](#custody-requirement) @@ -43,9 +42,8 @@ We define the following Python custom types for type hinting and readability: | Name | SSZ equivalent | Description | | - | - | - | -| `DataCell` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | The data unit of a cell in the extended data matrix | -| `DataColumn` | `List[DataCell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP7594 | -| `ExtendedMatrix` | `List[DataCell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data with blobs and one-dimensional erasure coding extension | +| `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP7594 | +| `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data with blobs and one-dimensional erasure coding extension | | `FlatExtendedMatrix` | `List[BLSFieldElement, MAX_BLOBS_PER_BLOCK * FIELD_ELEMENTS_PER_BLOB * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | | `LineIndex` | `uint64` | The index of the rows or columns in `FlatExtendedMatrix` matrix | @@ -55,7 +53,6 @@ We define the following Python custom types for type hinting and readability: | Name | Value | Description | | - | - | - | -| `FIELD_ELEMENTS_PER_CELL` | `uint64(2**6)` (= 64) | Elements per `DataCell` | | `NUMBER_OF_COLUMNS` | `uint64((FIELD_ELEMENTS_PER_BLOB * 2) // FIELD_ELEMENTS_PER_CELL)` (= 128) | Number of columns in the extended data matrix. | ### Custody setting @@ -95,19 +92,6 @@ def compute_extended_matrix(blobs: Sequence[Blob]) -> FlatExtendedMatrix: return FlatExtendedMatrix(matrix) ``` -#### `compute_samples_and_proofs` - -```python -def compute_samples_and_proofs(blob: Blob) -> Tuple[ - Vector[DataCell, NUMBER_OF_COLUMNS], - Vector[KZGProof, NUMBER_OF_COLUMNS]]: - """ - Defined in polynomial-commitments-sampling.md - """ - # pylint: disable=unused-argument - ... -``` - #### `get_data_column_sidecars` ```python @@ -119,7 +103,7 @@ def get_data_column_sidecars(signed_block: SignedBeaconBlock, block.body, get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments'), ) - cells_and_proofs = [compute_samples_and_proofs(blob) for blob in blobs] + cells_and_proofs = [compute_cells_and_proofs(blob) for blob in blobs] blob_count = len(blobs) cells = [cells_and_proofs[i][0] for i in range(blob_count)] proofs = [cells_and_proofs[i][1] for i in range(blob_count)] From 2cc7c8798d7ec13880dee51c8c63312828e41568 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 19 Jan 2024 18:09:30 +0800 Subject: [PATCH 04/25] Fix conflict --- .../pyspec/eth2spec/test/eip7594/unittests/test_security.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py index 07669dafc9..dd85a673e5 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_security.py @@ -1,10 +1,12 @@ from eth2spec.test.context import ( - MAINNET, spec_test, single_phase, with_eip7594_and_later, with_phases, ) +from eth2spec.test.helpers.constants import ( + MAINNET, +) @with_eip7594_and_later From 665e6faff7b052ef8c64584e062fcc0c86fc8794 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 19 Jan 2024 18:51:36 +0800 Subject: [PATCH 05/25] Add `RowIndex`, `ColumnIndex` custom types in crypto doc --- specs/_features/eip7594/das-core.md | 8 +++----- specs/_features/eip7594/p2p-interface.md | 20 +++++++++---------- .../polynomial-commitments-sampling.md | 12 ++++++----- .../test_polynomial_commitments.py | 4 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 635fa35f17..dfe9833b48 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -45,7 +45,6 @@ We define the following Python custom types for type hinting and readability: | `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP7594 | | `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data with blobs and one-dimensional erasure coding extension | | `FlatExtendedMatrix` | `List[BLSFieldElement, MAX_BLOBS_PER_BLOCK * FIELD_ELEMENTS_PER_BLOB * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | -| `LineIndex` | `uint64` | The index of the rows or columns in `FlatExtendedMatrix` matrix | ## Configuration @@ -68,11 +67,10 @@ We define the following Python custom types for type hinting and readability: #### `get_custody_lines` ```python -def get_custody_lines(node_id: NodeID, custody_size: uint64) -> Sequence[LineIndex]: +def get_custody_lines(node_id: NodeID, custody_size: uint64) -> Sequence[ColumnIndex]: assert custody_size <= NUMBER_OF_COLUMNS - all_items = list(range(NUMBER_OF_COLUMNS)) - line_index = node_id % NUMBER_OF_COLUMNS - return [LineIndex(all_items[(line_index + i) % len(all_items)]) for i in range(custody_size)] + column_index = node_id % NUMBER_OF_COLUMNS + return [ColumnIndex((column_index + i) % NUMBER_OF_COLUMNS) for i in range(custody_size)] ``` #### `compute_extended_data` diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index 4d55a69ef8..fd39516bd3 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -13,7 +13,7 @@ - [Configuration](#configuration) - [Containers](#containers) - [`DataColumnSidecar`](#datacolumnsidecar) - - [`DataColumnIdentifier`](#datacolumnidentifier) + - [`DataColumnIndexentifier`](#dataColumnIndexentifier) - [Helpers](#helpers) - [`verify_data_column_sidecar_kzg_proof`](#verify_data_column_sidecar_kzg_proof) - [`verify_data_column_sidecar_inclusion_proof`](#verify_data_column_sidecar_inclusion_proof) @@ -49,7 +49,7 @@ ```python class DataColumnSidecar(Container): - index: LineIndex # Index of column in extended matrix + index: ColumnIndex # Index of column in extended matrix column: DataColumn kzg_commitments: List[KZGCommitment, MAX_BLOB_COMMITMENTS_PER_BLOCK] kzg_proofs: List[KZGProof, MAX_BLOB_COMMITMENTS_PER_BLOCK] @@ -57,12 +57,12 @@ class DataColumnSidecar(Container): kzg_commitments_inclusion_proof: Vector[Bytes32, KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH] ``` -#### `DataColumnIdentifier` +#### `DataColumnIndexentifier` ```python -class DataColumnIdentifier(Container): +class DataColumnIndexentifier(Container): block_root: Root - index: LineIndex + index: ColumnIndex ``` ### Helpers @@ -74,14 +74,14 @@ def verify_data_column_sidecar_kzg_proof(sidecar: DataColumnSidecar) -> bool: """ Verify if the proofs are correct """ - row_ids = [LineIndex(i) for i in range(len(sidecar.column))] + row_ids = [RowIndex(i) for i in range(len(sidecar.column))] assert len(sidecar.column) == len(sidecar.kzg_commitments) == len(sidecar.kzg_proofs) # KZG batch verifies that the cells match the corresponding commitments and proofs return verify_cell_proof_batch( row_commitments=sidecar.kzg_commitments, - row_ids=row_ids, # all rows - column_ids=[sidecar.index], + row_indices=row_ids, # all rows + column_indices=[sidecar.index], datas=sidecar.column, proofs=sidecar.kzg_proofs, ) @@ -107,7 +107,7 @@ def verify_data_column_sidecar_inclusion_proof(sidecar: DataColumnSidecar) -> bo ##### `compute_subnet_for_data_column_sidecar` ```python -def compute_subnet_for_data_column_sidecar(column_index: LineIndex) -> SubnetID: +def compute_subnet_for_data_column_sidecar(column_index: ColumnIndex) -> SubnetID: return SubnetID(column_index % DATA_COLUMN_SIDECAR_SUBNET_COUNT) ``` @@ -167,7 +167,7 @@ Request Content: ``` ( - DataColumnIdentifier + DataColumnIndexentifier ) ``` diff --git a/specs/_features/eip7594/polynomial-commitments-sampling.md b/specs/_features/eip7594/polynomial-commitments-sampling.md index e09888a31b..e1cb2e85be 100644 --- a/specs/_features/eip7594/polynomial-commitments-sampling.md +++ b/specs/_features/eip7594/polynomial-commitments-sampling.md @@ -60,6 +60,8 @@ Public functions MUST accept raw bytes as input and perform the required cryptog | `PolynomialCoeff` | `List[BLSFieldElement, 2 * FIELD_ELEMENTS_PER_BLOB]` | A polynomial in coefficient form | | `Cell` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | The unit of blob data that can come with their own KZG proofs | | `CellID` | `uint64` | Cell identifier | +| `RowIndex` | `uint64` | Row identifier | +| `ColumnIndex` | `uint64` | Column identifier | ## Constants @@ -415,8 +417,8 @@ def verify_cell_proof(commitment: KZGCommitment, ```python def verify_cell_proof_batch(row_commitments: Sequence[KZGCommitment], - row_ids: Sequence[int], - column_ids: Sequence[int], + row_indices: Sequence[RowIndex], + column_indices: Sequence[ColumnIndex], cells: Sequence[Cell], proofs: Sequence[KZGProof]) -> bool: """ @@ -432,11 +434,11 @@ def verify_cell_proof_batch(row_commitments: Sequence[KZGCommitment], """ # Get commitments via row IDs - commitments = [row_commitments[row_id] for row_id in row_ids] + commitments = [row_commitments[row_index] for row_index in row_indices] return all( - verify_kzg_proof_multi_impl(commitment, coset_for_cell(column_id), cell, proof) - for commitment, column_id, cell, proof in zip(commitments, column_ids, cells, proofs) + verify_kzg_proof_multi_impl(commitment, coset_for_cell(column_index), cell, proof) + for commitment, column_index, cell, proof in zip(commitments, column_indices, cells, proofs) ) ``` diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py index d3e848772a..9affe5dfa4 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py @@ -50,8 +50,8 @@ def test_verify_cell_proof_batch(spec): assert spec.verify_cell_proof_batch( row_commitments=[commitment], - row_ids=[0], - column_ids=[0, 1], + row_indices=[0], + column_indices=[0, 1], cells=cells[0:1], proofs=proofs, ) From 9553d5490a174ea66c37a9a2bdd41ffb83c2ce6e Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 19 Jan 2024 22:30:53 +0800 Subject: [PATCH 06/25] fix typo --- specs/_features/eip7594/p2p-interface.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index fd39516bd3..ad9a7eb548 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -13,7 +13,7 @@ - [Configuration](#configuration) - [Containers](#containers) - [`DataColumnSidecar`](#datacolumnsidecar) - - [`DataColumnIndexentifier`](#dataColumnIndexentifier) + - [`DataColumnIdentifier`](#datacolumnidentifier) - [Helpers](#helpers) - [`verify_data_column_sidecar_kzg_proof`](#verify_data_column_sidecar_kzg_proof) - [`verify_data_column_sidecar_inclusion_proof`](#verify_data_column_sidecar_inclusion_proof) @@ -57,10 +57,10 @@ class DataColumnSidecar(Container): kzg_commitments_inclusion_proof: Vector[Bytes32, KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH] ``` -#### `DataColumnIndexentifier` +#### `DataColumnIdentifier` ```python -class DataColumnIndexentifier(Container): +class DataColumnIdentifier(Container): block_root: Root index: ColumnIndex ``` @@ -82,7 +82,7 @@ def verify_data_column_sidecar_kzg_proof(sidecar: DataColumnSidecar) -> bool: row_commitments=sidecar.kzg_commitments, row_indices=row_ids, # all rows column_indices=[sidecar.index], - datas=sidecar.column, + cells=sidecar.column, proofs=sidecar.kzg_proofs, ) ``` @@ -167,7 +167,7 @@ Request Content: ``` ( - DataColumnIndexentifier + DataColumnIdentifier ) ``` From a72ece83484c6650ae62788cacd243bbabb8ee40 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 19 Jan 2024 22:37:02 +0800 Subject: [PATCH 07/25] Apply suggestions from code review Co-authored-by: g11tech --- specs/_features/eip7594/das-core.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index dfe9833b48..109ffb3a00 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -43,7 +43,7 @@ We define the following Python custom types for type hinting and readability: | Name | SSZ equivalent | Description | | - | - | - | | `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP7594 | -| `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data with blobs and one-dimensional erasure coding extension | +| `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data of one-dimensional erasure coding extended blobs (in row major format) | | `FlatExtendedMatrix` | `List[BLSFieldElement, MAX_BLOBS_PER_BLOCK * FIELD_ELEMENTS_PER_BLOB * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | ## Configuration @@ -58,7 +58,7 @@ We define the following Python custom types for type hinting and readability: | Name | Value | Description | | - | - | - | -| `SAMPLES_PER_SLOT` | `8` | Number of random samples a node queries per slot | +| `SAMPLES_PER_SLOT` | `8` | Number of `DataColumn` random samples a node queries per slot | | `CUSTODY_REQUIREMENT` | `2` | Minimum number of columns an honest node custodies and serves samples from | | `TARGET_NUMBER_OF_PEERS` | `70` | Suggested minimum peer count | From 65be5b055631c1ce566d1293c3d7e354f3525367 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sat, 20 Jan 2024 00:24:53 +0800 Subject: [PATCH 08/25] Make `CUSTODY_REQUIREMENT` unit be subnets; move some depended helpers to `das-core.md` --- configs/mainnet.yaml | 3 ++ configs/minimal.yaml | 4 +- specs/_features/eip7594/das-core.md | 52 ++++++++++++++----- specs/_features/eip7594/p2p-interface.md | 19 ------- .../unittests/test_config_invariants.py | 6 +-- .../test/eip7594/unittests/test_custody.py | 39 +++++++++----- .../test/eip7594/unittests/test_networking.py | 4 +- 7 files changed, 76 insertions(+), 51 deletions(-) diff --git a/configs/mainnet.yaml b/configs/mainnet.yaml index fa664e122a..3b1c14e4be 100644 --- a/configs/mainnet.yaml +++ b/configs/mainnet.yaml @@ -157,3 +157,6 @@ BLOB_SIDECAR_SUBNET_COUNT: 6 WHISK_EPOCHS_PER_SHUFFLING_PHASE: 256 # `Epoch(2)` WHISK_PROPOSER_SELECTION_GAP: 2 + +# EIP7594 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 diff --git a/configs/minimal.yaml b/configs/minimal.yaml index 9ac4249a5d..d6d4a09426 100644 --- a/configs/minimal.yaml +++ b/configs/minimal.yaml @@ -62,7 +62,6 @@ WHISK_FORK_EPOCH: 18446744073709551615 EIP7594_FORK_VERSION: 0x06000001 EIP7594_FORK_EPOCH: 18446744073709551615 - # Time parameters # --------------------------------------------------------------- # [customized] Faster for testing purposes @@ -156,3 +155,6 @@ BLOB_SIDECAR_SUBNET_COUNT: 6 # Whisk WHISK_EPOCHS_PER_SHUFFLING_PHASE: 4 WHISK_PROPOSER_SELECTION_GAP: 1 + +# EIP7594 +DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 109ffb3a00..a7aa745eef 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -13,7 +13,7 @@ - [Data size](#data-size) - [Custody setting](#custody-setting) - [Helper functions](#helper-functions) - - [`get_custody_lines`](#get_custody_lines) + - [`get_custody_columns`](#get_custody_columns) - [`compute_extended_data`](#compute_extended_data) - [`compute_extended_matrix`](#compute_extended_matrix) - [`get_data_column_sidecars`](#get_data_column_sidecars) @@ -54,23 +54,51 @@ We define the following Python custom types for type hinting and readability: | - | - | - | | `NUMBER_OF_COLUMNS` | `uint64((FIELD_ELEMENTS_PER_BLOB * 2) // FIELD_ELEMENTS_PER_CELL)` (= 128) | Number of columns in the extended data matrix. | +### Networking + +| Name | Value | Description | +|------------------------------------|------------------|---------------------------------------------------------------------| +| `DATA_COLUMN_SIDECAR_SUBNET_COUNT` | `32` | The number of data column sidecar subnets used in the gossipsub protocol. | + ### Custody setting | Name | Value | Description | | - | - | - | | `SAMPLES_PER_SLOT` | `8` | Number of `DataColumn` random samples a node queries per slot | -| `CUSTODY_REQUIREMENT` | `2` | Minimum number of columns an honest node custodies and serves samples from | +| `CUSTODY_REQUIREMENT` | `1` | Minimum number of subnets an honest node custodies and serves samples from | | `TARGET_NUMBER_OF_PEERS` | `70` | Suggested minimum peer count | +### Containers + +#### `DataColumnSidecar` + +```python +class DataColumnSidecar(Container): + index: ColumnIndex # Index of column in extended matrix + column: DataColumn + kzg_commitments: List[KZGCommitment, MAX_BLOB_COMMITMENTS_PER_BLOCK] + kzg_proofs: List[KZGProof, MAX_BLOB_COMMITMENTS_PER_BLOCK] + signed_block_header: SignedBeaconBlockHeader + kzg_commitments_inclusion_proof: Vector[Bytes32, KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH] +``` + ### Helper functions -#### `get_custody_lines` +#### `get_custody_columns` ```python -def get_custody_lines(node_id: NodeID, custody_size: uint64) -> Sequence[ColumnIndex]: - assert custody_size <= NUMBER_OF_COLUMNS - column_index = node_id % NUMBER_OF_COLUMNS - return [ColumnIndex((column_index + i) % NUMBER_OF_COLUMNS) for i in range(custody_size)] +def get_custody_columns(node_id: NodeID, custody_subnet_count: uint64) -> Sequence[ColumnIndex]: + assert custody_subnet_count <= DATA_COLUMN_SIDECAR_SUBNET_COUNT + subnet_ids = [ + bytes_to_uint64(hash(uint_to_bytes(uint64(node_id + i)))[0:8]) % DATA_COLUMN_SIDECAR_SUBNET_COUNT + for i in range(custody_subnet_count) + ] + columns_per_subnet = NUMBER_OF_COLUMNS // DATA_COLUMN_SIDECAR_SUBNET_COUNT + return [ + ColumnIndex(subnet_id + (i * columns_per_subnet)) + for i in range(columns_per_subnet) + for subnet_id in subnet_ids + ] ``` #### `compute_extended_data` @@ -126,15 +154,15 @@ def get_data_column_sidecars(signed_block: SignedBeaconBlock, ### Custody requirement -Each node downloads and custodies a minimum of `CUSTODY_REQUIREMENT` columns per slot. The particular columns that the node is required to custody are selected pseudo-randomly (more on this below). +Each node downloads and custodies a minimum of `CUSTODY_REQUIREMENT` subnets per slot. The particular columns that the node is required to custody are selected pseudo-randomly (more on this below). -A node *may* choose to custody and serve more than the minimum honesty requirement. Such a node explicitly advertises a number greater than `CUSTODY_REQUIREMENT` via the peer discovery mechanism -- for example, in their ENR (e.g. `custody_lines: 8` if the node custodies `8` columns each slot) -- up to a `NUMBER_OF_COLUMNS` (i.e. a super-full node). +A node *may* choose to custody and serve more than the minimum honesty requirement. Such a node explicitly advertises a number greater than `CUSTODY_REQUIREMENT` via the peer discovery mechanism -- for example, in their ENR (e.g. `custody_lines: 4` if the node custodies `4` subnets each slot) -- up to a `DATA_COLUMN_SIDECAR_SUBNET_COUNT` (i.e. a super-full node). A node stores the custodied columns for the duration of the pruning period and responds to peer requests for samples on those columns. ### Public, deterministic selection -The particular columns that a node custodies are selected pseudo-randomly as a function (`get_custody_lines`) of the node-id and custody size -- importantly this function can be run by any party as the inputs are all public. +The particular columns that a node custodies are selected pseudo-randomly as a function (`get_custody_columns`) of the node-id and custody size -- importantly this function can be run by any party as the inputs are all public. *Note*: increasing the `custody_size` parameter for a given `node_id` extends the returned list (rather than being an entirely new shuffle) such that if `custody_size` is unknown, the default `CUSTODY_REQUIREMENT` will be correct for a subset of the node's custody. @@ -178,7 +206,7 @@ Once the node obtain the column, the node should send the missing columns to the ## Peer sampling -At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarByRoot` request. A node utilizes `get_custody_lines` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. +At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. ## Peer scoring @@ -220,4 +248,4 @@ However, for simplicity, we don't assign row custody assignments to nodes in the ### Subnet stability -To start with a simple, stable backbone, for now, we don't shuffle the subnet assignments via the deterministic custody selection helper `get_custody_lines`. However, staggered rotation likely needs to happen on the order of the pruning period to ensure subnets can be utilized for recovery. For example, introducing an `epoch` argument allows the function to maintain stability over many epochs. +To start with a simple, stable backbone, for now, we don't shuffle the subnet assignments via the deterministic custody selection helper `get_custody_columns`. However, staggered rotation likely needs to happen on the order of the pruning period to ensure subnets can be utilized for recovery. For example, introducing an `epoch` argument allows the function to maintain stability over many epochs. diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index ad9a7eb548..b50e4d5a06 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -37,26 +37,8 @@ |------------------------------------------|-----------------------------------|---------------------------------------------------------------------| | `KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH` | `uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')))` (= 4) | Merkle proof index for `blob_kzg_commitments` | -### Configuration - -| Name | Value | Description | -|------------------------------------------|-----------------------------------|---------------------------------------------------------------------| -| `DATA_COLUMN_SIDECAR_SUBNET_COUNT` | `32` | The number of data column sidecar subnets used in the gossipsub protocol. | - ### Containers -#### `DataColumnSidecar` - -```python -class DataColumnSidecar(Container): - index: ColumnIndex # Index of column in extended matrix - column: DataColumn - kzg_commitments: List[KZGCommitment, MAX_BLOB_COMMITMENTS_PER_BLOCK] - kzg_proofs: List[KZGProof, MAX_BLOB_COMMITMENTS_PER_BLOCK] - signed_block_header: SignedBeaconBlockHeader - kzg_commitments_inclusion_proof: Vector[Bytes32, KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH] -``` - #### `DataColumnIdentifier` ```python @@ -111,7 +93,6 @@ def compute_subnet_for_data_column_sidecar(column_index: ColumnIndex) -> SubnetI return SubnetID(column_index % DATA_COLUMN_SIDECAR_SUBNET_COUNT) ``` - ### The gossip domain: gossipsub Some gossip meshes are upgraded in the EIP-7594 fork to support upgraded types. diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py index 712e9892c0..5f709a22ac 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py @@ -12,6 +12,6 @@ def test_invariants(spec): assert spec.FIELD_ELEMENTS_PER_BLOB % spec.FIELD_ELEMENTS_PER_CELL == 0 assert spec.FIELD_ELEMENTS_PER_BLOB * 2 % spec.NUMBER_OF_COLUMNS == 0 assert spec.SAMPLES_PER_SLOT <= spec.NUMBER_OF_COLUMNS - assert spec.CUSTODY_REQUIREMENT <= spec.NUMBER_OF_COLUMNS - assert spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT <= spec.NUMBER_OF_COLUMNS - assert spec.NUMBER_OF_COLUMNS % spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT == 0 + assert spec.CUSTODY_REQUIREMENT <= spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT + assert spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT <= spec.NUMBER_OF_COLUMNS + assert spec.NUMBER_OF_COLUMNS % spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT == 0 diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py index d629b3a066..9c9bcb2a18 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py @@ -6,36 +6,47 @@ ) +def run_get_custody_columns(spec, peer_count, custody_subnet_count): + assignments = [spec.get_custody_columns(node_id, custody_subnet_count) for node_id in range(peer_count)] + + subnet_per_column = spec.NUMBER_OF_COLUMNS // spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT + for assignment in assignments: + assert len(assignment) == custody_subnet_count * subnet_per_column + + @with_eip7594_and_later @spec_test @single_phase -def test_get_custody_lines_peers_within_number_of_columns(spec): +def test_get_custody_columns_peers_within_number_of_columns(spec): peer_count = 10 - custody_size = spec.CUSTODY_REQUIREMENT + custody_subnet_count = spec.CUSTODY_REQUIREMENT assert spec.NUMBER_OF_COLUMNS > peer_count - assignments = [spec.get_custody_lines(node_id, custody_size) for node_id in range(peer_count)] - - for assignment in assignments: - assert len(assignment) == custody_size + run_get_custody_columns(spec, peer_count, custody_subnet_count) @with_eip7594_and_later @spec_test @single_phase -def test_get_custody_lines_peers_more_than_number_of_columns(spec): +def test_get_custody_columns_peers_more_than_number_of_columns(spec): peer_count = 200 - custody_size = spec.CUSTODY_REQUIREMENT + custody_subnet_count = spec.CUSTODY_REQUIREMENT assert spec.NUMBER_OF_COLUMNS < peer_count - assignments = [spec.get_custody_lines(node_id, custody_size) for node_id in range(peer_count)] + run_get_custody_columns(spec, peer_count, custody_subnet_count) - for assignment in assignments: - assert len(assignment) == custody_size + +@with_eip7594_and_later +@spec_test +@single_phase +def test_get_custody_columns_maximum_subnets(spec): + peer_count = 10 + custody_subnet_count = spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT + run_get_custody_columns(spec, peer_count, custody_subnet_count) @with_eip7594_and_later @spec_test @single_phase -def test_get_custody_lines_custody_size_more_than_number_of_columns(spec): +def test_get_custody_columns_custody_size_more_than_number_of_columns(spec): node_id = 1 - custody_size = spec.NUMBER_OF_COLUMNS + 1 - expect_assertion_error(lambda: spec.get_custody_lines(node_id, custody_size)) + custody_subnet_count = spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT + 1 + expect_assertion_error(lambda: spec.get_custody_columns(node_id, custody_subnet_count)) diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py index 972539f5cf..2ab52be6c5 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_networking.py @@ -10,10 +10,10 @@ @single_phase def test_compute_subnet_for_data_column_sidecar(spec): subnet_results = [] - for column_index in range(spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT): + for column_index in range(spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT): subnet_results.append(spec.compute_subnet_for_data_column_sidecar(column_index)) # no duplicates assert len(subnet_results) == len(set(subnet_results)) # next one should be duplicate - next_subnet = spec.compute_subnet_for_data_column_sidecar(spec.DATA_COLUMN_SIDECAR_SUBNET_COUNT) + next_subnet = spec.compute_subnet_for_data_column_sidecar(spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT) assert next_subnet == subnet_results[0] From 55db86127dbfb5bedbc57c32af19341ddfc3c964 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sat, 20 Jan 2024 14:20:27 +0800 Subject: [PATCH 09/25] Apply suggestions from code review Co-authored-by: Justin Traglia <95511699+jtraglia@users.noreply.github.com> --- specs/_features/eip7594/das-core.md | 3 +-- specs/_features/eip7594/p2p-interface.md | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 109ffb3a00..121d91279c 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -42,7 +42,7 @@ We define the following Python custom types for type hinting and readability: | Name | SSZ equivalent | Description | | - | - | - | -| `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP7594 | +| `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP-7594 | | `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data of one-dimensional erasure coding extended blobs (in row major format) | | `FlatExtendedMatrix` | `List[BLSFieldElement, MAX_BLOBS_PER_BLOCK * FIELD_ELEMENTS_PER_BLOB * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | @@ -217,7 +217,6 @@ The potential benefits of having row custody could include: However, for simplicity, we don't assign row custody assignments to nodes in the current design. - ### Subnet stability To start with a simple, stable backbone, for now, we don't shuffle the subnet assignments via the deterministic custody selection helper `get_custody_lines`. However, staggered rotation likely needs to happen on the order of the pruning period to ensure subnets can be utilized for recovery. For example, introducing an `epoch` argument allows the function to maintain stability over many epochs. diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index ad9a7eb548..73804f79c3 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -33,15 +33,15 @@ ### Preset -| Name | Value | Description | -|------------------------------------------|-----------------------------------|---------------------------------------------------------------------| -| `KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH` | `uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')))` (= 4) | Merkle proof index for `blob_kzg_commitments` | +| Name | Value | Description | +| - | - | - | +| `KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH` | `uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')))` (= 4) | Merkle proof index for `blob_kzg_commitments` | ### Configuration -| Name | Value | Description | -|------------------------------------------|-----------------------------------|---------------------------------------------------------------------| -| `DATA_COLUMN_SIDECAR_SUBNET_COUNT` | `32` | The number of data column sidecar subnets used in the gossipsub protocol. | +| Name | Value | Description | +| - | - | - | +| `DATA_COLUMN_SIDECAR_SUBNET_COUNT` | `32` | The number of data column sidecar subnets used in the gossipsub protocol | ### Containers @@ -74,8 +74,9 @@ def verify_data_column_sidecar_kzg_proof(sidecar: DataColumnSidecar) -> bool: """ Verify if the proofs are correct """ - row_ids = [RowIndex(i) for i in range(len(sidecar.column))] + assert sidecar.index < NUMBER_OF_COLUMNS assert len(sidecar.column) == len(sidecar.kzg_commitments) == len(sidecar.kzg_proofs) + row_ids = [RowIndex(i) for i in range(len(sidecar.column))] # KZG batch verifies that the cells match the corresponding commitments and proofs return verify_cell_proof_batch( @@ -111,7 +112,6 @@ def compute_subnet_for_data_column_sidecar(column_index: ColumnIndex) -> SubnetI return SubnetID(column_index % DATA_COLUMN_SIDECAR_SUBNET_COUNT) ``` - ### The gossip domain: gossipsub Some gossip meshes are upgraded in the EIP-7594 fork to support upgraded types. @@ -159,9 +159,9 @@ The `` field is calculated as `context = compute_fork_digest(fork [1]: # (eth2spec: skip) -| `fork_version` | Chunk SSZ type | -|--------------------------|-------------------------------| -| `EIP7594_FORK_VERSION` | `eip7594.DataColumnSidecar` | +| `fork_version` | Chunk SSZ type | +| - | - | +| `EIP7594_FORK_VERSION` | `eip7594.DataColumnSidecar` | Request Content: From 4477cc695263e3b691abb41416cbda875ee7eab7 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sat, 20 Jan 2024 13:51:18 +0800 Subject: [PATCH 10/25] Fix column computation --- specs/_features/eip7594/das-core.md | 19 ++++++++++++++----- .../test/eip7594/unittests/test_custody.py | 7 +++++-- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index a7aa745eef..4ebb352488 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -89,13 +89,22 @@ class DataColumnSidecar(Container): ```python def get_custody_columns(node_id: NodeID, custody_subnet_count: uint64) -> Sequence[ColumnIndex]: assert custody_subnet_count <= DATA_COLUMN_SIDECAR_SUBNET_COUNT - subnet_ids = [ - bytes_to_uint64(hash(uint_to_bytes(uint64(node_id + i)))[0:8]) % DATA_COLUMN_SIDECAR_SUBNET_COUNT - for i in range(custody_subnet_count) - ] + + subnet_ids = [] + i = 0 + while len(subnet_ids) < custody_subnet_count: + subnet_id = ( + bytes_to_uint64(hash(uint_to_bytes(uint64(node_id + i)))[0:8]) + % DATA_COLUMN_SIDECAR_SUBNET_COUNT + ) + if subnet_id not in subnet_ids: + subnet_ids.append(subnet_id) + i += 1 + assert len(subnet_ids) == len(set(subnet_ids)) + columns_per_subnet = NUMBER_OF_COLUMNS // DATA_COLUMN_SIDECAR_SUBNET_COUNT return [ - ColumnIndex(subnet_id + (i * columns_per_subnet)) + ColumnIndex(DATA_COLUMN_SIDECAR_SUBNET_COUNT * i + subnet_id) for i in range(columns_per_subnet) for subnet_id in subnet_ids ] diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py index 9c9bcb2a18..9c8168b338 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py @@ -9,9 +9,12 @@ def run_get_custody_columns(spec, peer_count, custody_subnet_count): assignments = [spec.get_custody_columns(node_id, custody_subnet_count) for node_id in range(peer_count)] - subnet_per_column = spec.NUMBER_OF_COLUMNS // spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT + columns_per_subnet = spec.NUMBER_OF_COLUMNS // spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT for assignment in assignments: - assert len(assignment) == custody_subnet_count * subnet_per_column + assert len(assignment) == custody_subnet_count * columns_per_subnet + print('assignment', assignment) + print('set(assignment)', set(assignment)) + assert len(assignment) == len(set(assignment)) @with_eip7594_and_later From 56e6a98b9c505f7854cff8840592af4b14d93877 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sat, 20 Jan 2024 17:24:57 +0800 Subject: [PATCH 11/25] `verify_data_column_sidecar_kzg_proof` -> `verify_data_column_sidecar_kzg_proofs` --- specs/_features/eip7594/p2p-interface.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index 73804f79c3..3444850c6d 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -15,7 +15,7 @@ - [`DataColumnSidecar`](#datacolumnsidecar) - [`DataColumnIdentifier`](#datacolumnidentifier) - [Helpers](#helpers) - - [`verify_data_column_sidecar_kzg_proof`](#verify_data_column_sidecar_kzg_proof) + - [`verify_data_column_sidecar_kzg_proofs`](#verify_data_column_sidecar_kzg_proofs) - [`verify_data_column_sidecar_inclusion_proof`](#verify_data_column_sidecar_inclusion_proof) - [`compute_subnet_for_data_column_sidecar`](#compute_subnet_for_data_column_sidecar) - [The gossip domain: gossipsub](#the-gossip-domain-gossipsub) @@ -67,10 +67,10 @@ class DataColumnIdentifier(Container): ### Helpers -##### `verify_data_column_sidecar_kzg_proof` +##### `verify_data_column_sidecar_kzg_proofs` ```python -def verify_data_column_sidecar_kzg_proof(sidecar: DataColumnSidecar) -> bool: +def verify_data_column_sidecar_kzg_proofs(sidecar: DataColumnSidecar) -> bool: """ Verify if the proofs are correct """ @@ -138,7 +138,7 @@ The following validations MUST pass before forwarding the `sidecar: DataColumnSi - _[REJECT]_ The sidecar is from a higher slot than the sidecar's block's parent (defined by `block_header.parent_root`). - _[REJECT]_ The current finalized_checkpoint is an ancestor of the sidecar's block -- i.e. `get_checkpoint_block(store, block_header.parent_root, store.finalized_checkpoint.epoch) == store.finalized_checkpoint.root`. - _[REJECT]_ The sidecar's `kzg_commitments` field inclusion proof is valid as verified by `verify_data_column_sidecar_inclusion_proof(sidecar)`. -- _[REJECT]_ The sidecar's column data is valid as verified by `verify_data_column_sidecar_kzg_proof(sidecar)`. +- _[REJECT]_ The sidecar's column data is valid as verified by `verify_data_column_sidecar_kzg_proofs(sidecar)`. - _[IGNORE]_ The sidecar is the first sidecar for the tuple `(block_header.slot, block_header.proposer_index, sidecar.index)` with valid header signature, sidecar inclusion proof, and kzg proof. - _[REJECT]_ The sidecar is proposed by the expected `proposer_index` for the block's slot in the context of the current shuffling (defined by `block_header.parent_root`/`block_header.slot`). If the `proposer_index` cannot immediately be verified against the expected shuffling, the sidecar MAY be queued for later processing while proposers for the block's branch are calculated -- in such a case _do not_ `REJECT`, instead `IGNORE` this message. From edeef070d81db0c0ab1b1691ce829f0b87cad06f Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Sun, 28 Jan 2024 22:47:49 +0800 Subject: [PATCH 12/25] toc --- specs/_features/eip7594/das-core.md | 3 +++ specs/_features/eip7594/p2p-interface.md | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 4ebb352488..e3d4cb7a5a 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -11,7 +11,10 @@ - [Custom types](#custom-types) - [Configuration](#configuration) - [Data size](#data-size) + - [Networking](#networking) - [Custody setting](#custody-setting) + - [Containers](#containers) + - [`DataColumnSidecar`](#datacolumnsidecar) - [Helper functions](#helper-functions) - [`get_custody_columns`](#get_custody_columns) - [`compute_extended_data`](#compute_extended_data) diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index b50e4d5a06..e53374e6d0 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -10,9 +10,7 @@ - [Modifications in EIP-7594](#modifications-in-eip-7594) - [Preset](#preset) - - [Configuration](#configuration) - [Containers](#containers) - - [`DataColumnSidecar`](#datacolumnsidecar) - [`DataColumnIdentifier`](#datacolumnidentifier) - [Helpers](#helpers) - [`verify_data_column_sidecar_kzg_proof`](#verify_data_column_sidecar_kzg_proof) From 170dae560962cde49b715aabda9a417599f440e8 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Mon, 29 Jan 2024 17:34:47 +0800 Subject: [PATCH 13/25] Apply suggestions from code review Co-authored-by: Justin Traglia <95511699+jtraglia@users.noreply.github.com> Co-authored-by: Pop Chunhapanya --- specs/_features/eip7594/das-core.md | 2 +- specs/_features/eip7594/p2p-interface.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 25ef751b13..21d0124cc2 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -182,7 +182,7 @@ The particular columns that a node custodies are selected pseudo-randomly as a f At each slot, a node needs to be able to readily sample from *any* set of columns. To this end, a node should find and maintain a set of diverse and reliable peers that can regularly satisfy their sampling demands. -A node runs a background peer discovery process, maintaining at least `TARGET_NUMBER_OF_PEERS` of various custody distributions (both custody_size and column assignments). The combination of advertised `custody_size` size and public node-id make this readily and publicly accessible. +A node runs a background peer discovery process, maintaining at least `TARGET_NUMBER_OF_PEERS` of various custody distributions (both `custody_size` and column assignments). The combination of advertised `custody_size` size and public node-id make this readily and publicly accessible. `TARGET_NUMBER_OF_PEERS` should be tuned upward in the event of failed sampling. diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index 0e1aa7ac3d..c671a052de 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -123,7 +123,7 @@ The following validations MUST pass before forwarding the `sidecar: DataColumnSi - _[REJECT]_ The sidecar is proposed by the expected `proposer_index` for the block's slot in the context of the current shuffling (defined by `block_header.parent_root`/`block_header.slot`). If the `proposer_index` cannot immediately be verified against the expected shuffling, the sidecar MAY be queued for later processing while proposers for the block's branch are calculated -- in such a case _do not_ `REJECT`, instead `IGNORE` this message. -*Note:* In the `verify_data_column_sidecar_inclusion_proof(sidecar)` check, for all the sidecars of the same block, it verifies against the same set of `kzg_commitments` of the given beacon beacon. Client can choose to cache the result of the arguments tuple `(sidecar.kzg_commitments, sidecar.kzg_commitments_inclusion_proof, sidecar.signed_block_header)`. +*Note:* In the `verify_data_column_sidecar_inclusion_proof(sidecar)` check, for all the sidecars of the same block, it verifies against the same set of `kzg_commitments` of the given beacon block. Client can choose to cache the result of the arguments tuple `(sidecar.kzg_commitments, sidecar.kzg_commitments_inclusion_proof, sidecar.signed_block_header)`. ### The Req/Resp domain From 547460cd84bee20a0569e153a44d920395a4a53a Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 30 Jan 2024 22:09:20 +0800 Subject: [PATCH 14/25] Apply PR feedback --- specs/_features/eip7594/das-core.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 21d0124cc2..49682a86e1 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -47,7 +47,7 @@ We define the following Python custom types for type hinting and readability: | - | - | - | | `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP-7594 | | `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data of one-dimensional erasure coding extended blobs (in row major format) | -| `FlatExtendedMatrix` | `List[BLSFieldElement, MAX_BLOBS_PER_BLOCK * FIELD_ELEMENTS_PER_BLOB * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | +| `FlatExtendedMatrix` | `List[BLSFieldElement, FIELD_ELEMENTS_PER_CELL * MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | ## Configuration From d23452d02bb54e9bffc5547fde00cfd1537d6d42 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 31 Jan 2024 12:07:30 +0800 Subject: [PATCH 15/25] Deprecate `blob_sidecar_{subnet_id}` --- specs/_features/eip7594/p2p-interface.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index c671a052de..c87741acd1 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -18,7 +18,8 @@ - [`compute_subnet_for_data_column_sidecar`](#compute_subnet_for_data_column_sidecar) - [The gossip domain: gossipsub](#the-gossip-domain-gossipsub) - [Topics and messages](#topics-and-messages) - - [Samples subnets](#samples-subnets) + - [Blob subnets](#blob-subnets) + - [Deprecated `blob_sidecar_{subnet_id}`](#deprecated-blob_sidecar_subnet_id) - [`data_column_sidecar_{subnet_id}`](#data_column_sidecar_subnet_id) - [The Req/Resp domain](#the-reqresp-domain) - [Messages](#messages) @@ -98,7 +99,11 @@ Some gossip meshes are upgraded in the EIP-7594 fork to support upgraded types. #### Topics and messages -##### Samples subnets +##### Blob subnets + +###### Deprecated `blob_sidecar_{subnet_id}` + +`blob_sidecar_{subnet_id}` is deprecated. ###### `data_column_sidecar_{subnet_id}` From 87e9702ab0267b4c2320aff31f09a550f744652b Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Wed, 31 Jan 2024 16:23:12 +0800 Subject: [PATCH 16/25] Fix `DataColumnSidecarsByRoot` --- specs/_features/eip7594/p2p-interface.md | 42 +++++++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index c87741acd1..86099749d3 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -10,6 +10,7 @@ - [Modifications in EIP-7594](#modifications-in-eip-7594) - [Preset](#preset) + - [Configuration](#configuration) - [Containers](#containers) - [`DataColumnIdentifier`](#datacolumnidentifier) - [Helpers](#helpers) @@ -23,7 +24,7 @@ - [`data_column_sidecar_{subnet_id}`](#data_column_sidecar_subnet_id) - [The Req/Resp domain](#the-reqresp-domain) - [Messages](#messages) - - [DataColumnSidecarByRoot v1](#datacolumnsidecarbyroot-v1) + - [DataColumnSidecarsByRoot v1](#datacolumnsidecarsbyroot-v1) @@ -36,6 +37,16 @@ | - | - | - | | `KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH` | `uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')))` (= 4) | Merkle proof index for `blob_kzg_commitments` | + +### Configuration + +*[New in Deneb:EIP4844]* + +| Name | Value | Description | +|------------------------------------------|-----------------------------------|---------------------------------------------------------------------| +| `MAX_REQUEST_DATA_COLUMN_SIDECARS` | `NUMBER_OF_COLUMNS` | Maximum number of data column sidecars in a single request | +| `MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS` | `2**12` (= 4096 epochs, ~18 days) | The minimum epoch range over which a node must serve data column sidecars | + ### Containers #### `DataColumnIdentifier` @@ -134,11 +145,11 @@ The following validations MUST pass before forwarding the `sidecar: DataColumnSi #### Messages -##### DataColumnSidecarByRoot v1 +##### DataColumnSidecarsByRoot v1 -**Protocol ID:** `/eth2/beacon_chain/req/data_column_sidecar_by_root/1/` +**Protocol ID:** `/eth2/beacon_chain/req/data_column_sidecars_by_root/1/` -*[New in Deneb:EIP4844]* +*[New in EIP7594]* The `` field is calculated as `context = compute_fork_digest(fork_version, genesis_validators_root)`: @@ -153,6 +164,7 @@ Request Content: ``` ( DataColumnIdentifier + List[DataColumnIdentifier, MAX_REQUEST_DATA_COLUMN_SIDECARS] ) ``` @@ -160,6 +172,26 @@ Response Content: ``` ( - DataColumnSidecar + List[DataColumnSidecar, MAX_REQUEST_DATA_COLUMN_SIDECARS] ) ``` + +Requests sidecars by block root and index. +The response is a list of `DataColumnIdentifier` whose length is less than or equal to the number of requests. +It may be less in the case that the responding peer is missing blocks or sidecars. + +Before consuming the next response chunk, the response reader SHOULD verify the data column sidecar is well-formatted, has valid inclusion proof, and is correct w.r.t. the expected KZG commitments through `verify_data_column_sidecar_kzg_proofs`. + +No more than `MAX_REQUEST_DATA_COLUMN_SIDECARS` may be requested at a time. + +The response MUST consist of zero or more `response_chunk`. +Each _successful_ `response_chunk` MUST contain a single `DataColumnSidecar` payload. + +Clients MUST support requesting sidecars since `minimum_request_epoch`, where `minimum_request_epoch = max(finalized_epoch, current_epoch - MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS, EIP7594_FORK_EPOCH)`. If any root in the request content references a block earlier than `minimum_request_epoch`, peers MAY respond with error code `3: ResourceUnavailable` or not include the data column sidecar in the response. + +Clients MUST respond with at least one sidecar, if they have it. +Clients MAY limit the number of blocks and sidecars in the response. + +Clients SHOULD include a sidecar in the response as soon as it passes the gossip validation rules. +Clients SHOULD NOT respond with sidecars related to blocks that fail gossip validation rules. +Clients SHOULD NOT respond with sidecars related to blocks that fail the beacon chain state transition From 428c1662835f0c19abfc293a64cc30c780da8cc0 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 1 Feb 2024 21:17:42 +0800 Subject: [PATCH 17/25] Apply suggestions from code review Co-authored-by: g11tech Co-authored-by: Pop Chunhapanya --- specs/_features/eip7594/das-core.md | 4 ++-- specs/_features/eip7594/p2p-interface.md | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 49682a86e1..0096322d1b 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -166,9 +166,9 @@ def get_data_column_sidecars(signed_block: SignedBeaconBlock, ### Custody requirement -Each node downloads and custodies a minimum of `CUSTODY_REQUIREMENT` subnets per slot. The particular columns that the node is required to custody are selected pseudo-randomly (more on this below). +Each node downloads and custodies a minimum of `CUSTODY_REQUIREMENT` subnets per slot. The particular subnets that the node is required to custody are selected pseudo-randomly (more on this below). -A node *may* choose to custody and serve more than the minimum honesty requirement. Such a node explicitly advertises a number greater than `CUSTODY_REQUIREMENT` via the peer discovery mechanism -- for example, in their ENR (e.g. `custody_lines: 4` if the node custodies `4` subnets each slot) -- up to a `DATA_COLUMN_SIDECAR_SUBNET_COUNT` (i.e. a super-full node). +A node *may* choose to custody and serve more than the minimum honesty requirement. Such a node explicitly advertises a number greater than `CUSTODY_REQUIREMENT` via the peer discovery mechanism -- for example, in their ENR (e.g. `custody_subnet_count: 4` if the node custodies `4` subnets each slot) -- up to a `DATA_COLUMN_SIDECAR_SUBNET_COUNT` (i.e. a super-full node). A node stores the custodied columns for the duration of the pruning period and responds to peer requests for samples on those columns. diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index 86099749d3..fb60774b1e 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -163,7 +163,6 @@ Request Content: ``` ( - DataColumnIdentifier List[DataColumnIdentifier, MAX_REQUEST_DATA_COLUMN_SIDECARS] ) ``` From c47d5f3578d975aba0a8e673a105fbc451078a49 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 2 Feb 2024 01:45:02 +0800 Subject: [PATCH 18/25] Add `recover_matrix` and remove unused `FlatExtendedMatrix` type --- specs/_features/eip7594/das-core.md | 34 ++++++++++---- specs/deneb/polynomial-commitments.md | 7 +++ .../test_polynomial_commitments.py | 8 +--- .../test/eip7594/unittests/das/__init__.py | 0 .../test/eip7594/unittests/das/test_das.py | 44 +++++++++++++++++++ .../test_polynomial_commitments.py | 14 +++--- .../test/eip7594/unittests/test_custody.py | 2 - 7 files changed, 83 insertions(+), 26 deletions(-) create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/das/__init__.py create mode 100644 tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 0096322d1b..a1565f4f76 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -18,7 +18,7 @@ - [Helper functions](#helper-functions) - [`get_custody_columns`](#get_custody_columns) - [`compute_extended_data`](#compute_extended_data) - - [`compute_extended_matrix`](#compute_extended_matrix) + - [`recover_matrix`](#recover_matrix) - [`get_data_column_sidecars`](#get_data_column_sidecars) - [Custody](#custody) - [Custody requirement](#custody-requirement) @@ -47,7 +47,6 @@ We define the following Python custom types for type hinting and readability: | - | - | - | | `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP-7594 | | `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data of one-dimensional erasure coding extended blobs (in row major format) | -| `FlatExtendedMatrix` | `List[BLSFieldElement, FIELD_ELEMENTS_PER_CELL * MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The flattened format of `ExtendedMatrix` | ## Configuration @@ -122,12 +121,29 @@ def compute_extended_data(data: Sequence[BLSFieldElement]) -> Sequence[BLSFieldE ... ``` -#### `compute_extended_matrix` +#### `recover_matrix` ```python -def compute_extended_matrix(blobs: Sequence[Blob]) -> FlatExtendedMatrix: - matrix = [compute_extended_data(blob) for blob in blobs] - return FlatExtendedMatrix(matrix) +def recover_matrix(cells_dict: Dict[Tuple[BlobIndex, CellID], Cell], blob_count: uint64) -> ExtendedMatrix: + """ + Return the recovered ``ExtendedMatrix``. + + This helper demonstrate how to apply ``recover_polynomial``. + The data structure for storing cells is implementation-dependent. + """ + extended_matrix = [] + for blob_index in range(blob_count): + cell_ids = [cell_id for b_index, cell_id in cells_dict.keys() if b_index == blob_index] + cells = [cells_dict[(blob_index, cell_id)] for cell_id in cell_ids] + cells_bytes = [[bls_field_to_bytes(element) for element in cell] for cell in cells] + + full_polynomial = recover_polynomial(cell_ids, cells_bytes) + cells_from_full_polynomial = [ + full_polynomial[i * FIELD_ELEMENTS_PER_CELL:(i + 1) * FIELD_ELEMENTS_PER_CELL] + for i in range(CELLS_PER_BLOB) + ] + extended_matrix.extend(cells_from_full_polynomial) + return ExtendedMatrix(extended_matrix) ``` #### `get_data_column_sidecars` @@ -204,7 +220,7 @@ To custody a particular column, a node joins the respective gossip subnet. Verif ### Reconstruction and cross-seeding -If the node obtains 50%+ of all the columns, they can reconstruct the full data matrix via `recover_samples_impl` helper. +If the node obtains 50%+ of all the columns, they can reconstruct the full data matrix via `recover_matrix` helper. If a node fails to sample a peer or fails to get a column on the column subnet, a node can utilize the Req/Resp message to query the missing column from other peers. @@ -218,7 +234,7 @@ Once the node obtain the column, the node should send the missing columns to the ## Peer sampling -At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. +At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. ## Peer scoring @@ -240,7 +256,7 @@ The fork choice rule (essentially a DA filter) is *orthogonal to a given DAS des In any DAS design, there are probably a few degrees of freedom around timing, acceptability of short-term re-orgs, etc. -For example, the fork choice rule might require validators to do successful DAS on slot N to be able to include block of slot `N` in its fork choice. That's the tightest DA filter. But trailing filters are also probably acceptable, knowing that there might be some failures/short re-orgs but that they don't hurt the aggregate security. For example, the rule could be — DAS must be completed for slot N-1 for a child block in N to be included in the fork choice. +For example, the fork choice rule might require validators to do successful DAS on slot `N` to be able to include block of slot `N` in its fork choice. That's the tightest DA filter. But trailing filters are also probably acceptable, knowing that there might be some failures/short re-orgs but that they don't hurt the aggregate security. For example, the rule could be — DAS must be completed for slot N-1 for a child block in N to be included in the fork choice. Such trailing techniques and their analysis will be valuable for any DAS construction. The question is — can you relax how quickly you need to do DA and in the worst case not confirm unavailable data via attestations/finality, and what impact does it have on short-term re-orgs and fast confirmation rules. diff --git a/specs/deneb/polynomial-commitments.md b/specs/deneb/polynomial-commitments.md index 33945d249f..818bee6435 100644 --- a/specs/deneb/polynomial-commitments.md +++ b/specs/deneb/polynomial-commitments.md @@ -20,6 +20,7 @@ - [BLS12-381 helpers](#bls12-381-helpers) - [`hash_to_bls_field`](#hash_to_bls_field) - [`bytes_to_bls_field`](#bytes_to_bls_field) + - [`bls_field_to_bytes`](#bls_field_to_bytes) - [`validate_kzg_g1`](#validate_kzg_g1) - [`bytes_to_kzg_commitment`](#bytes_to_kzg_commitment) - [`bytes_to_kzg_proof`](#bytes_to_kzg_proof) @@ -170,6 +171,12 @@ def bytes_to_bls_field(b: Bytes32) -> BLSFieldElement: return BLSFieldElement(field_element) ``` +#### `bls_field_to_bytes` + +```python +def bls_field_to_bytes(x: BLSFieldElement) -> Bytes32: + return int.to_bytes(x % BLS_MODULUS, 32, KZG_ENDIANNESS) +``` #### `validate_kzg_g1` diff --git a/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py b/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py index 7ee7168d9f..1d43d07caf 100644 --- a/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py +++ b/tests/core/pyspec/eth2spec/test/deneb/unittests/polynomial_commitments/test_polynomial_commitments.py @@ -32,10 +32,6 @@ def bls_add_one(x): ) -def field_element_bytes(x): - return int.to_bytes(x % BLS_MODULUS, 32, "big") - - @with_deneb_and_later @spec_test @single_phase @@ -43,7 +39,7 @@ def test_verify_kzg_proof(spec): """ Test the wrapper functions (taking bytes arguments) for computing and verifying KZG proofs. """ - x = field_element_bytes(3) + x = spec.bls_field_to_bytes(3) blob = get_sample_blob(spec) commitment = spec.blob_to_kzg_commitment(blob) proof, y = spec.compute_kzg_proof(blob, x) @@ -58,7 +54,7 @@ def test_verify_kzg_proof_incorrect_proof(spec): """ Test the wrapper function `verify_kzg_proof` fails on an incorrect proof. """ - x = field_element_bytes(3465) + x = spec.bls_field_to_bytes(3465) blob = get_sample_blob(spec) commitment = spec.blob_to_kzg_commitment(blob) proof, y = spec.compute_kzg_proof(blob, x) diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/__init__.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py new file mode 100644 index 0000000000..15ed6536f8 --- /dev/null +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py @@ -0,0 +1,44 @@ +import random +from eth2spec.test.context import ( + spec_test, + single_phase, + with_eip7594_and_later, +) +from eth2spec.test.helpers.sharding import ( + get_sample_blob, +) + + +@with_eip7594_and_later +@spec_test +@single_phase +def test_recover_matrix(spec): + rng = random.Random(5566) + + # Number of samples we will be recovering from + N_SAMPLES = spec.CELLS_PER_BLOB // 2 + + blob_count = 2 + cells_dict = {} + original_cells = [] + for blob_index in range(blob_count): + # Get the data we will be working with + blob = get_sample_blob(spec, rng=rng) + # Extend data with Reed-Solomon and split the extended data in cells + cells = spec.compute_cells(blob) + original_cells.append(cells) + cell_ids = [] + # First figure out just the indices of the cells + for _ in range(N_SAMPLES): + cell_id = rng.randint(0, spec.CELLS_PER_BLOB - 1) + while cell_id in cell_ids: + cell_id = rng.randint(0, spec.CELLS_PER_BLOB - 1) + cell_ids.append(cell_id) + cell = cells[cell_id] + cells_dict[(blob_index, cell_id)] = cell + assert len(cell_ids) == N_SAMPLES + + # Recover the matrix + recovered_matrix = spec.recover_matrix(cells_dict, blob_count) + flatten_original_cells = [cell for cells in original_cells for cell in cells] + assert recovered_matrix == flatten_original_cells diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py index 9cdb9912ce..4a080488b4 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py @@ -10,10 +10,6 @@ from eth2spec.utils.bls import BLS_MODULUS -def field_element_bytes(x): - return int.to_bytes(x % BLS_MODULUS, 32, "big") - - @with_eip7594_and_later @spec_test @single_phase @@ -39,7 +35,7 @@ def test_verify_cell_proof(spec): commitment = spec.blob_to_kzg_commitment(blob) cells, proofs = spec.compute_cells_and_proofs(blob) - cells_bytes = [[field_element_bytes(element) for element in cell] for cell in cells] + cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells] cell_id = 0 assert spec.verify_cell_proof(commitment, cell_id, cells_bytes[cell_id], proofs[cell_id]) @@ -54,7 +50,7 @@ def test_verify_cell_proof_batch(spec): blob = get_sample_blob(spec) commitment = spec.blob_to_kzg_commitment(blob) cells, proofs = spec.compute_cells_and_proofs(blob) - cells_bytes = [[field_element_bytes(element) for element in cell] for cell in cells] + cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells] assert len(cells) == len(proofs) @@ -83,15 +79,15 @@ def test_recover_polynomial(spec): # Extend data with Reed-Solomon and split the extended data in cells cells = spec.compute_cells(blob) - cells_bytes = [[field_element_bytes(element) for element in cell] for cell in cells] + cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells] # Compute the cells we will be recovering from cell_ids = [] # First figure out just the indices of the cells for i in range(N_SAMPLES): - j = rng.randint(0, spec.CELLS_PER_BLOB) + j = rng.randint(0, spec.CELLS_PER_BLOB - 1) while j in cell_ids: - j = rng.randint(0, spec.CELLS_PER_BLOB) + j = rng.randint(0, spec.CELLS_PER_BLOB - 1) cell_ids.append(j) # Now the cells themselves known_cells_bytes = [cells_bytes[cell_id] for cell_id in cell_ids] diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py index 9c8168b338..e1ab136c4f 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_custody.py @@ -12,8 +12,6 @@ def run_get_custody_columns(spec, peer_count, custody_subnet_count): columns_per_subnet = spec.NUMBER_OF_COLUMNS // spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT for assignment in assignments: assert len(assignment) == custody_subnet_count * columns_per_subnet - print('assignment', assignment) - print('set(assignment)', set(assignment)) assert len(assignment) == len(set(assignment)) From 91dbbb365ceeabf4e2c4602133b226c0761bc857 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 2 Feb 2024 02:24:07 +0800 Subject: [PATCH 19/25] Implement `compute_extended_matrix` --- specs/_features/eip7594/das-core.md | 23 +++++++++++------- .../test/eip7594/unittests/das/test_das.py | 24 +++++++++++++++++++ 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index a1565f4f76..98b8a624f3 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -17,7 +17,7 @@ - [`DataColumnSidecar`](#datacolumnsidecar) - [Helper functions](#helper-functions) - [`get_custody_columns`](#get_custody_columns) - - [`compute_extended_data`](#compute_extended_data) + - [`compute_extended_matrix`](#compute_extended_matrix) - [`recover_matrix`](#recover_matrix) - [`get_data_column_sidecars`](#get_data_column_sidecars) - [Custody](#custody) @@ -112,13 +112,20 @@ def get_custody_columns(node_id: NodeID, custody_subnet_count: uint64) -> Sequen ] ``` -#### `compute_extended_data` +#### `compute_extended_matrix` ```python -def compute_extended_data(data: Sequence[BLSFieldElement]) -> Sequence[BLSFieldElement]: - # TODO - # pylint: disable=unused-argument - ... +def compute_extended_matrix(blobs: Sequence[Blob]) -> ExtendedMatrix: + """ + Return the full ``ExtendedMatrix``. + + This helper demonstrates the relationship between blobs and ``ExtendedMatrix``. + The data structure for storing cells is implementation-dependent. + """ + extended_matrix = [] + for blob in blobs: + extended_matrix.extend(compute_cells(blob)) + return ExtendedMatrix(extended_matrix) ``` #### `recover_matrix` @@ -128,7 +135,7 @@ def recover_matrix(cells_dict: Dict[Tuple[BlobIndex, CellID], Cell], blob_count: """ Return the recovered ``ExtendedMatrix``. - This helper demonstrate how to apply ``recover_polynomial``. + This helper demonstrates how to apply ``recover_polynomial``. The data structure for storing cells is implementation-dependent. """ extended_matrix = [] @@ -208,7 +215,7 @@ A node runs a background peer discovery process, maintaining at least `TARGET_NU ## Extended data -In this construction, we extend the blobs using a one-dimensional erasure coding extension. The matrix comprises maximum `MAX_BLOBS_PER_BLOCK` rows and fixed `NUMBER_OF_COLUMNS` columns, with each row containing a `Blob` and its corresponding extension. +In this construction, we extend the blobs using a one-dimensional erasure coding extension. The matrix comprises maximum `MAX_BLOBS_PER_BLOCK` rows and fixed `NUMBER_OF_COLUMNS` columns, with each row containing a `Blob` and its corresponding extension. `compute_extended_matrix` demonstrates the relationship between blobs and custom type `ExtendedMatrix`. ## Column gossip diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py index 15ed6536f8..24011fcdd7 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py @@ -9,6 +9,30 @@ ) +@with_eip7594_and_later +@spec_test +@single_phase +def test_compute_extended_matrix(spec): + rng = random.Random(5566) + + blob_count = 2 + input_blobs = [get_sample_blob(spec, rng=rng) for _ in range(blob_count)] + extended_matrix = spec.compute_extended_matrix(input_blobs) + assert len(extended_matrix) == spec.CELLS_PER_BLOB * blob_count + + rows = [extended_matrix[i:(i + spec.CELLS_PER_BLOB)] for i in range(0, len(extended_matrix), spec.CELLS_PER_BLOB)] + assert len(rows) == blob_count + assert len(rows[0]) == spec.CELLS_PER_BLOB + + for blob_index, row in enumerate(rows): + extended_blob = [] + for cell in row: + extended_blob.extend(cell) + blob_part = extended_blob[0:len(extended_blob) // 2] + blob = b''.join([spec.bls_field_to_bytes(x) for x in blob_part]) + assert blob == input_blobs[blob_index] + + @with_eip7594_and_later @spec_test @single_phase From e7c0d5ff3c3b23216f9aeca0a7e194ac4c9be316 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Fri, 2 Feb 2024 16:27:56 +0800 Subject: [PATCH 20/25] Update specs/_features/eip7594/das-core.md Co-authored-by: Jimmy Chen --- specs/_features/eip7594/das-core.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 98b8a624f3..e8596691e3 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -221,7 +221,7 @@ In this construction, we extend the blobs using a one-dimensional erasure coding ### Parameters -For each column -- use `data_column_sidecar_{subnet_id}` subnets, where each column index maps to the `subnet_id`. The sidecars can be computed with `get_data_column_sidecars(signed_block: SignedBeaconBlock, blobs: Sequence[Blob])` helper. +For each column -- use `data_column_sidecar_{subnet_id}` subnets, where `subnet_id` can be computed with the `compute_subnet_for_data_column_sidecar(column_index: ColumnIndex)` helper. The sidecars can be computed with the `get_data_column_sidecars(signed_block: SignedBeaconBlock, blobs: Sequence[Blob])` helper. To custody a particular column, a node joins the respective gossip subnet. Verifiable samples from their respective column are gossiped on the assigned subnet. From 8150f76e1c743644343371c9dab67004bd254fa7 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Feb 2024 22:02:10 +0800 Subject: [PATCH 21/25] Apply @cskiraly's suggestion --- specs/_features/eip7594/das-core.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index e8596691e3..f06486f5ae 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -241,7 +241,7 @@ Once the node obtain the column, the node should send the missing columns to the ## Peer sampling -At each slot, a node makes (locally randomly determined) `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. +A node SHOULD maintain a diverse set of peers for each column and each slot by verifying responsiveness to sample queries. At each slot, a node makes `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. ## Peer scoring From bb33f9082c552af101a96ffbd9e973cb40464d7a Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Feb 2024 22:05:41 +0800 Subject: [PATCH 22/25] Change List length of `DataColumn` from `MAX_BLOBS_PER_BLOCK` to `MAX_BLOB_COMMITMENTS_PER_BLOCK` --- specs/_features/eip7594/das-core.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index f06486f5ae..553d6e3e5e 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -45,7 +45,7 @@ We define the following Python custom types for type hinting and readability: | Name | SSZ equivalent | Description | | - | - | - | -| `DataColumn` | `List[Cell, MAX_BLOBS_PER_BLOCK]` | The data of each column in EIP-7594 | +| `DataColumn` | `List[Cell, MAX_BLOB_COMMITMENTS_PER_BLOCK]` | The data of each column in EIP-7594 | | `ExtendedMatrix` | `List[Cell, MAX_BLOBS_PER_BLOCK * NUMBER_OF_COLUMNS]` | The full data of one-dimensional erasure coding extended blobs (in row major format) | ## Configuration From 1acb1ff24fa0d59806a0ec5165b3c0c18d84afc5 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 20 Feb 2024 22:23:23 +0800 Subject: [PATCH 23/25] minor arrange --- specs/_features/eip7594/das-core.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md index 553d6e3e5e..83d47e1856 100644 --- a/specs/_features/eip7594/das-core.md +++ b/specs/_features/eip7594/das-core.md @@ -27,9 +27,9 @@ - [Extended data](#extended-data) - [Column gossip](#column-gossip) - [Parameters](#parameters) - - [Reconstruction and cross-seeding](#reconstruction-and-cross-seeding) - [Peer sampling](#peer-sampling) - [Peer scoring](#peer-scoring) +- [Reconstruction and cross-seeding](#reconstruction-and-cross-seeding) - [DAS providers](#das-providers) - [A note on fork choice](#a-note-on-fork-choice) - [FAQs](#faqs) @@ -203,13 +203,13 @@ The particular columns that a node custodies are selected pseudo-randomly as a f ## Peer discovery -At each slot, a node needs to be able to readily sample from *any* set of columns. To this end, a node should find and maintain a set of diverse and reliable peers that can regularly satisfy their sampling demands. +At each slot, a node needs to be able to readily sample from *any* set of columns. To this end, a node SHOULD find and maintain a set of diverse and reliable peers that can regularly satisfy their sampling demands. A node runs a background peer discovery process, maintaining at least `TARGET_NUMBER_OF_PEERS` of various custody distributions (both `custody_size` and column assignments). The combination of advertised `custody_size` size and public node-id make this readily and publicly accessible. `TARGET_NUMBER_OF_PEERS` should be tuned upward in the event of failed sampling. -*Note*: while high-capacity and super-full nodes are high value with respect to satisfying sampling requirements, a node should maintain a distribution across node capacities as to not centralize the p2p graph too much (in the extreme becomes hub/spoke) and to distribute sampling load better across all nodes. +*Note*: while high-capacity and super-full nodes are high value with respect to satisfying sampling requirements, a node SHOULD maintain a distribution across node capacities as to not centralize the p2p graph too much (in the extreme becomes hub/spoke) and to distribute sampling load better across all nodes. *Note*: A DHT-based peer discovery mechanism is expected to be utilized in the above. The beacon-chain network currently utilizes discv5 in a similar method as described for finding peers of particular distributions of attestation subnets. Additional peer discovery methods are valuable to integrate (e.g., latent peer discovery via libp2p gossipsub) to add a defense in breadth against one of the discovery methods being attacked. @@ -225,13 +225,21 @@ For each column -- use `data_column_sidecar_{subnet_id}` subnets, where `subnet_ To custody a particular column, a node joins the respective gossip subnet. Verifiable samples from their respective column are gossiped on the assigned subnet. -### Reconstruction and cross-seeding +## Peer sampling + +A node SHOULD maintain a diverse set of peers for each column and each slot by verifying responsiveness to sample queries. At each slot, a node makes `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. + +## Peer scoring + +Due to the deterministic custody functions, a node knows exactly what a peer should be able to respond to. In the event that a peer does not respond to samples of their custodied rows/columns, a node may downscore or disconnect from a peer. + +## Reconstruction and cross-seeding If the node obtains 50%+ of all the columns, they can reconstruct the full data matrix via `recover_matrix` helper. If a node fails to sample a peer or fails to get a column on the column subnet, a node can utilize the Req/Resp message to query the missing column from other peers. -Once the node obtain the column, the node should send the missing columns to the column subnets. +Once the node obtain the column, the node SHOULD send the missing columns to the column subnets. *Note*: A node always maintains a matrix view of the rows and columns they are following, able to cross-reference and cross-seed in either direction. @@ -239,14 +247,6 @@ Once the node obtain the column, the node should send the missing columns to the *Note*: There may be anti-DoS and quality-of-service considerations around how to send samples and consider samples -- is each individual sample a message or are they sent in aggregate forms. -## Peer sampling - -A node SHOULD maintain a diverse set of peers for each column and each slot by verifying responsiveness to sample queries. At each slot, a node makes `SAMPLES_PER_SLOT` queries for samples from their peers via `DataColumnSidecarsByRoot` request. A node utilizes `get_custody_columns` helper to determine which peer(s) to request from. If a node has enough good/honest peers across all rows and columns, this has a high chance of success. - -## Peer scoring - -Due to the deterministic custody functions, a node knows exactly what a peer should be able to respond to. In the event that a peer does not respond to samples of their custodied rows/columns, a node may downscore or disconnect from a peer. - ## DAS providers A DAS provider is a consistently-available-for-DAS-queries, super-full (or high capacity) node. To the p2p, these look just like other nodes but with high advertised capacity, and they should generally be able to be latently found via normal discovery. From cebf78a83e6fc8fa237daf4264b9ca0fe61473f4 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Tue, 27 Feb 2024 22:42:32 +0900 Subject: [PATCH 24/25] Apply PR feedback --- configs/mainnet.yaml | 2 ++ configs/minimal.yaml | 2 ++ presets/mainnet/eip7594.yaml | 2 -- presets/minimal/eip7594.yaml | 2 -- pysetup/spec_builders/eip7594.py | 2 +- specs/_features/eip7594/p2p-interface.md | 17 ++++++++++++++++- specs/altair/p2p-interface.md | 10 +++++++++- .../eip7594/unittests/test_config_invariants.py | 11 +++++++---- 8 files changed, 37 insertions(+), 11 deletions(-) diff --git a/configs/mainnet.yaml b/configs/mainnet.yaml index 3b1c14e4be..e7d3089c2b 100644 --- a/configs/mainnet.yaml +++ b/configs/mainnet.yaml @@ -159,4 +159,6 @@ WHISK_EPOCHS_PER_SHUFFLING_PHASE: 256 WHISK_PROPOSER_SELECTION_GAP: 2 # EIP7594 +NUMBER_OF_COLUMNS: 128 DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 diff --git a/configs/minimal.yaml b/configs/minimal.yaml index d6d4a09426..692ba97c7e 100644 --- a/configs/minimal.yaml +++ b/configs/minimal.yaml @@ -157,4 +157,6 @@ WHISK_EPOCHS_PER_SHUFFLING_PHASE: 4 WHISK_PROPOSER_SELECTION_GAP: 1 # EIP7594 +NUMBER_OF_COLUMNS: 128 DATA_COLUMN_SIDECAR_SUBNET_COUNT: 32 +MAX_REQUEST_DATA_COLUMN_SIDECARS: 16384 diff --git a/presets/mainnet/eip7594.yaml b/presets/mainnet/eip7594.yaml index ab7b8b936e..e34265b9e9 100644 --- a/presets/mainnet/eip7594.yaml +++ b/presets/mainnet/eip7594.yaml @@ -6,5 +6,3 @@ FIELD_ELEMENTS_PER_CELL: 64 # uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')) KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH: 4 -# `uint64((FIELD_ELEMENTS_PER_BLOB * 2) // FIELD_ELEMENTS_PER_CELL)` (= 128) -NUMBER_OF_COLUMNS: 128 diff --git a/presets/minimal/eip7594.yaml b/presets/minimal/eip7594.yaml index cf814c9db5..def2b4e806 100644 --- a/presets/minimal/eip7594.yaml +++ b/presets/minimal/eip7594.yaml @@ -6,5 +6,3 @@ FIELD_ELEMENTS_PER_CELL: 64 # uint64(floorlog2(get_generalized_index(BeaconBlockBody, 'blob_kzg_commitments')) KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH: 4 -# `uint64((FIELD_ELEMENTS_PER_BLOB * 2) // FIELD_ELEMENTS_PER_CELL)` (= 128) -NUMBER_OF_COLUMNS: 128 diff --git a/pysetup/spec_builders/eip7594.py b/pysetup/spec_builders/eip7594.py index 4ad02fc07e..8ce35d6bd6 100644 --- a/pysetup/spec_builders/eip7594.py +++ b/pysetup/spec_builders/eip7594.py @@ -17,7 +17,7 @@ def imports(cls, preset_name: str): def hardcoded_custom_type_dep_constants(cls, spec_object) -> str: return { 'FIELD_ELEMENTS_PER_CELL': spec_object.preset_vars['FIELD_ELEMENTS_PER_CELL'].value, - 'NUMBER_OF_COLUMNS': spec_object.preset_vars['NUMBER_OF_COLUMNS'].value, + 'NUMBER_OF_COLUMNS': spec_object.config_vars['NUMBER_OF_COLUMNS'].value, } @classmethod diff --git a/specs/_features/eip7594/p2p-interface.md b/specs/_features/eip7594/p2p-interface.md index fb60774b1e..b282abbe49 100644 --- a/specs/_features/eip7594/p2p-interface.md +++ b/specs/_features/eip7594/p2p-interface.md @@ -25,6 +25,9 @@ - [The Req/Resp domain](#the-reqresp-domain) - [Messages](#messages) - [DataColumnSidecarsByRoot v1](#datacolumnsidecarsbyroot-v1) + - [The discovery domain: discv5](#the-discovery-domain-discv5) + - [ENR structure](#enr-structure) + - [`custody_subnet_count`](#custody_subnet_count) @@ -44,7 +47,7 @@ | Name | Value | Description | |------------------------------------------|-----------------------------------|---------------------------------------------------------------------| -| `MAX_REQUEST_DATA_COLUMN_SIDECARS` | `NUMBER_OF_COLUMNS` | Maximum number of data column sidecars in a single request | +| `MAX_REQUEST_DATA_COLUMN_SIDECARS` | `MAX_REQUEST_BLOCKS_DENEB * NUMBER_OF_COLUMNS` | Maximum number of data column sidecars in a single request | | `MIN_EPOCHS_FOR_DATA_COLUMN_SIDECARS_REQUESTS` | `2**12` (= 4096 epochs, ~18 days) | The minimum epoch range over which a node must serve data column sidecars | ### Containers @@ -194,3 +197,15 @@ Clients MAY limit the number of blocks and sidecars in the response. Clients SHOULD include a sidecar in the response as soon as it passes the gossip validation rules. Clients SHOULD NOT respond with sidecars related to blocks that fail gossip validation rules. Clients SHOULD NOT respond with sidecars related to blocks that fail the beacon chain state transition + +### The discovery domain: discv5 + +#### ENR structure + +##### `custody_subnet_count` + +A new field is added to the ENR under the key `custody_subnet_count` to facilitate custody data column discovery. + +| Key | Value | +|:-----------------------|:-------------| +| `custody_subnet_count` | SSZ `uint64` | diff --git a/specs/altair/p2p-interface.md b/specs/altair/p2p-interface.md index fac540fb99..5b0c938df7 100644 --- a/specs/altair/p2p-interface.md +++ b/specs/altair/p2p-interface.md @@ -33,6 +33,8 @@ Altair adds new messages, topics and data to the Req-Resp, Gossip and Discovery - [GetMetaData v2](#getmetadata-v2) - [Transitioning from v1 to v2](#transitioning-from-v1-to-v2) - [The discovery domain: discv5](#the-discovery-domain-discv5) + - [ENR structure](#enr-structure) + - [Sync committee bitfield](#sync-committee-bitfield) @@ -287,10 +289,16 @@ the responder MUST return the **InvalidRequest** response code. ### The discovery domain: discv5 -The `attnets` key of the ENR is used as defined in the Phase 0 document. +#### ENR structure + +##### Sync committee bitfield An additional bitfield is added to the ENR under the key `syncnets` to facilitate sync committee subnet discovery. The length of this bitfield is `SYNC_COMMITTEE_SUBNET_COUNT` where each bit corresponds to a distinct `subnet_id` for a specific sync committee subnet. The `i`th bit is set in this bitfield if the validator is currently subscribed to the `sync_committee_{i}` topic. +| Key | Value | +|:-------------|:-------------------------------------------------| +| `syncnets` | SSZ `Bitvector[SYNC_COMMITTEE_SUBNET_COUNT]` | + See the [validator document](./validator.md#sync-committee-subnet-stability) for further details on how the new bits are used. diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py index 5f709a22ac..90cf0b5222 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py @@ -10,8 +10,11 @@ @single_phase def test_invariants(spec): assert spec.FIELD_ELEMENTS_PER_BLOB % spec.FIELD_ELEMENTS_PER_CELL == 0 - assert spec.FIELD_ELEMENTS_PER_BLOB * 2 % spec.NUMBER_OF_COLUMNS == 0 - assert spec.SAMPLES_PER_SLOT <= spec.NUMBER_OF_COLUMNS + assert spec.FIELD_ELEMENTS_PER_BLOB * 2 % spec.config.NUMBER_OF_COLUMNS == 0 + assert spec.SAMPLES_PER_SLOT <= spec.config.NUMBER_OF_COLUMNS assert spec.CUSTODY_REQUIREMENT <= spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT - assert spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT <= spec.NUMBER_OF_COLUMNS - assert spec.NUMBER_OF_COLUMNS % spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT == 0 + assert spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT <= spec.config.NUMBER_OF_COLUMNS + assert spec.config.NUMBER_OF_COLUMNS % spec.config.DATA_COLUMN_SIDECAR_SUBNET_COUNT == 0 + assert spec.config.MAX_REQUEST_DATA_COLUMN_SIDECARS == ( + spec.config.MAX_REQUEST_BLOCKS_DENEB * spec.config.NUMBER_OF_COLUMNS + ) From 5535e6a2abac45dee1019d7cab4a920a0cf36c98 Mon Sep 17 00:00:00 2001 From: Hsiao-Wei Wang Date: Thu, 4 Apr 2024 22:32:08 +0900 Subject: [PATCH 25/25] fix conflict --- pysetup/spec_builders/eip7594.py | 2 +- .../eth2spec/test/eip7594/unittests/test_config_invariants.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pysetup/spec_builders/eip7594.py b/pysetup/spec_builders/eip7594.py index 0608f71e35..414d73f327 100644 --- a/pysetup/spec_builders/eip7594.py +++ b/pysetup/spec_builders/eip7594.py @@ -17,6 +17,7 @@ def imports(cls, preset_name: str): def hardcoded_custom_type_dep_constants(cls, spec_object) -> str: return { 'FIELD_ELEMENTS_PER_CELL': spec_object.preset_vars['FIELD_ELEMENTS_PER_CELL'].value, + 'FIELD_ELEMENTS_PER_EXT_BLOB': spec_object.preset_vars['FIELD_ELEMENTS_PER_EXT_BLOB'].value, 'NUMBER_OF_COLUMNS': spec_object.config_vars['NUMBER_OF_COLUMNS'].value, } @@ -24,5 +25,4 @@ def hardcoded_custom_type_dep_constants(cls, spec_object) -> str: def hardcoded_func_dep_presets(cls, spec_object) -> Dict[str, str]: return { 'KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH': spec_object.preset_vars['KZG_COMMITMENTS_INCLUSION_PROOF_DEPTH'].value, - 'FIELD_ELEMENTS_PER_EXT_BLOB': spec_object.preset_vars['FIELD_ELEMENTS_PER_EXT_BLOB'].value, } diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py index 1d7d0d262e..c750c67697 100644 --- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py +++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/test_config_invariants.py @@ -20,5 +20,8 @@ def test_invariants(spec): ) +@with_eip7594_and_later +@spec_test +@single_phase def test_polynomical_commitments_sampling(spec): assert spec.FIELD_ELEMENTS_PER_EXT_BLOB == 2 * spec.FIELD_ELEMENTS_PER_BLOB