From bcd0a09e68da264ed840c2aa0e603d66d8cae8f7 Mon Sep 17 00:00:00 2001
From: Justin Traglia <95511699+jtraglia@users.noreply.github.com>
Date: Mon, 22 Apr 2024 09:03:57 -0500
Subject: [PATCH] Integrate new Coset/CosetEvals types (#3701)

* Add CellBytes type

* Use ByteVector, not Vector

* Fix tests

* Replace b"" with []

* Apply suggestions for bytes_to_cell

* Add/integrate new CosetEvals type

* Remove accidental new line

* Fix recover_all_cells

* Fix recover_matrix

* fix CosetEvals abstraction leak

* Introduce internal Coset type for `coset_for_cell()`

* Use CosetEvals in compute_kzg_proof_multi_impl()

* update test

* satisfy linter

* Fix two nits I noticed

---------

Co-authored-by: Kevaundray Wedderburn <kevtheappdev@gmail.com>
Co-authored-by: George Kadianakis <desnacked@riseup.net>
---
 specs/_features/eip7594/das-core.md           |  3 +-
 .../polynomial-commitments-sampling.md        | 87 ++++++++++++-------
 .../test/eip7594/unittests/das/test_das.py    |  2 +-
 .../test_polynomial_commitments.py            | 19 ++--
 4 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/specs/_features/eip7594/das-core.md b/specs/_features/eip7594/das-core.md
index 379f1e4f6d..9d0a839ba0 100644
--- a/specs/_features/eip7594/das-core.md
+++ b/specs/_features/eip7594/das-core.md
@@ -143,9 +143,8 @@ def recover_matrix(cells_dict: Dict[Tuple[BlobIndex, CellID], Cell], blob_count:
     for blob_index in range(blob_count):
         cell_ids = [cell_id for b_index, cell_id in cells_dict.keys() if b_index == blob_index]
         cells = [cells_dict[(blob_index, cell_id)] for cell_id in cell_ids]
-        cells_bytes = [[bls_field_to_bytes(element) for element in cell] for cell in cells]
 
-        all_cells_for_row = recover_all_cells(cell_ids, cells_bytes)
+        all_cells_for_row = recover_all_cells(cell_ids, cells)
         extended_matrix.extend(all_cells_for_row)
     return ExtendedMatrix(extended_matrix)
 ```
diff --git a/specs/_features/eip7594/polynomial-commitments-sampling.md b/specs/_features/eip7594/polynomial-commitments-sampling.md
index 38bf98899b..c56b1de014 100644
--- a/specs/_features/eip7594/polynomial-commitments-sampling.md
+++ b/specs/_features/eip7594/polynomial-commitments-sampling.md
@@ -13,7 +13,8 @@
   - [Cells](#cells)
 - [Helper functions](#helper-functions)
   - [BLS12-381 helpers](#bls12-381-helpers)
-    - [`bytes_to_cell`](#bytes_to_cell)
+    - [`cell_to_coset_evals`](#cell_to_coset_evals)
+    - [`coset_evals_to_cell`](#coset_evals_to_cell)
   - [Linear combinations](#linear-combinations)
     - [`g2_lincomb`](#g2_lincomb)
   - [FFTs](#ffts)
@@ -63,7 +64,9 @@ Public functions MUST accept raw bytes as input and perform the required cryptog
 | Name | SSZ equivalent | Description |
 | - | - | - |
 | `PolynomialCoeff` | `List[BLSFieldElement, FIELD_ELEMENTS_PER_EXT_BLOB]` | A polynomial in coefficient form |
-| `Cell` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | The unit of blob data that can come with their own KZG proofs |
+| `Coset` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | The evaluation domain of a cell |
+| `CosetEvals` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_CELL]` | The internal representation of a cell (the evaluations over its Coset) |
+| `Cell` | `ByteVector[BYTES_PER_FIELD_ELEMENT * FIELD_ELEMENTS_PER_CELL]` | The unit of blob data that can come with its own KZG proof |
 | `CellID` | `uint64` | Cell identifier |
 | `RowIndex` | `uint64` | Row identifier |
 | `ColumnIndex` | `uint64` | Column identifier |
@@ -91,14 +94,33 @@ Cells are the smallest unit of blob data that can come with their own KZG proofs
 
 ### BLS12-381 helpers
 
-#### `bytes_to_cell`
+#### `cell_to_coset_evals`
 
 ```python
-def bytes_to_cell(cell_bytes: Vector[Bytes32, FIELD_ELEMENTS_PER_CELL]) -> Cell:
+def cell_to_coset_evals(cell: Cell) -> CosetEvals:
     """
-    Convert untrusted bytes into a Cell.
+    Convert an untrusted ``Cell`` into a trusted ``CosetEvals``.
     """
-    return [bytes_to_bls_field(element) for element in cell_bytes]
+    evals = []
+    for i in range(FIELD_ELEMENTS_PER_CELL):
+        start = i * BYTES_PER_FIELD_ELEMENT
+        end = (i + 1) * BYTES_PER_FIELD_ELEMENT
+        value = bytes_to_bls_field(cell[start:end])
+        evals.append(value)
+    return CosetEvals(evals)
+```
+
+#### `coset_evals_to_cell`
+
+```python
+def coset_evals_to_cell(coset_evals: CosetEvals) -> Cell:
+    """
+    Convert a trusted ``CosetEval`` into an untrusted ``Cell``.
+    """
+    cell = []
+    for i in range(FIELD_ELEMENTS_PER_CELL):
+        cell += bls_field_to_bytes(coset_evals[i])
+    return Cell(cell)
 ```
 
 ### Linear combinations
@@ -306,7 +328,7 @@ Extended KZG functions for multiproofs
 ```python
 def compute_kzg_proof_multi_impl(
         polynomial_coeff: PolynomialCoeff,
-        zs: Sequence[BLSFieldElement]) -> Tuple[KZGProof, Sequence[BLSFieldElement]]:
+        zs: Coset) -> Tuple[KZGProof, CosetEvals]:
     """
     Compute a KZG multi-evaluation proof for a set of `k` points.
 
@@ -336,8 +358,8 @@ def compute_kzg_proof_multi_impl(
 
 ```python
 def verify_kzg_proof_multi_impl(commitment: KZGCommitment,
-                                zs: Sequence[BLSFieldElement],
-                                ys: Sequence[BLSFieldElement],
+                                zs: Coset,
+                                ys: CosetEvals,
                                 proof: KZGProof) -> bool:
     """
     Verify a KZG multi-evaluation proof for a set of `k` points.
@@ -376,7 +398,7 @@ def verify_kzg_proof_multi_impl(commitment: KZGCommitment,
 #### `coset_for_cell`
 
 ```python
-def coset_for_cell(cell_id: CellID) -> Cell:
+def coset_for_cell(cell_id: CellID) -> Coset:
     """
     Get the coset for a given ``cell_id``
     """
@@ -384,7 +406,7 @@ def coset_for_cell(cell_id: CellID) -> Cell:
     roots_of_unity_brp = bit_reversal_permutation(
         compute_roots_of_unity(FIELD_ELEMENTS_PER_EXT_BLOB)
     )
-    return Cell(roots_of_unity_brp[FIELD_ELEMENTS_PER_CELL * cell_id:FIELD_ELEMENTS_PER_CELL * (cell_id + 1)])
+    return Coset(roots_of_unity_brp[FIELD_ELEMENTS_PER_CELL * cell_id:FIELD_ELEMENTS_PER_CELL * (cell_id + 1)])
 ```
 
 ## Cells
@@ -413,7 +435,7 @@ def compute_cells_and_proofs(blob: Blob) -> Tuple[
     for i in range(CELLS_PER_EXT_BLOB):
         coset = coset_for_cell(i)
         proof, ys = compute_kzg_proof_multi_impl(polynomial_coeff, coset)
-        cells.append(ys)
+        cells.append(coset_evals_to_cell(ys))
         proofs.append(proof)
 
     return cells, proofs
@@ -434,8 +456,12 @@ def compute_cells(blob: Blob) -> Vector[Cell, CELLS_PER_EXT_BLOB]:
     extended_data = fft_field(polynomial_coeff + [0] * FIELD_ELEMENTS_PER_BLOB,
                               compute_roots_of_unity(FIELD_ELEMENTS_PER_EXT_BLOB))
     extended_data_rbo = bit_reversal_permutation(extended_data)
-    return [extended_data_rbo[i * FIELD_ELEMENTS_PER_CELL:(i + 1) * FIELD_ELEMENTS_PER_CELL]
-            for i in range(CELLS_PER_EXT_BLOB)]
+    cells = []
+    for cell_id in range(CELLS_PER_EXT_BLOB):
+        start = cell_id * FIELD_ELEMENTS_PER_CELL
+        end = (cell_id + 1) * FIELD_ELEMENTS_PER_CELL
+        cells.append(coset_evals_to_cell(extended_data_rbo[start:end]))
+    return cells
 ```
 
 ### Cell verification
@@ -445,7 +471,7 @@ def compute_cells(blob: Blob) -> Vector[Cell, CELLS_PER_EXT_BLOB]:
 ```python
 def verify_cell_proof(commitment_bytes: Bytes48,
                       cell_id: CellID,
-                      cell_bytes: Vector[Bytes32, FIELD_ELEMENTS_PER_CELL],
+                      cell: Cell,
                       proof_bytes: Bytes48) -> bool:
     """
     Check a cell proof
@@ -457,7 +483,7 @@ def verify_cell_proof(commitment_bytes: Bytes48,
     return verify_kzg_proof_multi_impl(
         bytes_to_kzg_commitment(commitment_bytes),
         coset,
-        bytes_to_cell(cell_bytes),
+        cell_to_coset_evals(cell),
         bytes_to_kzg_proof(proof_bytes))
 ```
 
@@ -467,7 +493,7 @@ def verify_cell_proof(commitment_bytes: Bytes48,
 def verify_cell_proof_batch(row_commitments_bytes: Sequence[Bytes48],
                             row_indices: Sequence[RowIndex],
                             column_indices: Sequence[ColumnIndex],
-                            cells_bytes: Sequence[Vector[Bytes32, FIELD_ELEMENTS_PER_CELL]],
+                            cells: Sequence[Cell],
                             proofs_bytes: Sequence[Bytes48]) -> bool:
     """
     Verify a set of cells, given their corresponding proofs and their coordinates (row_id, column_id) in the blob
@@ -483,19 +509,19 @@ def verify_cell_proof_batch(row_commitments_bytes: Sequence[Bytes48],
 
     Public method.
     """
-    assert len(cells_bytes) == len(proofs_bytes) == len(row_indices) == len(column_indices)
+    assert len(cells) == len(proofs_bytes) == len(row_indices) == len(column_indices)
 
     # Get commitments via row IDs
     commitments_bytes = [row_commitments_bytes[row_index] for row_index in row_indices]
 
     # Get objects from bytes
     commitments = [bytes_to_kzg_commitment(commitment_bytes) for commitment_bytes in commitments_bytes]
-    cells = [bytes_to_cell(cell_bytes) for cell_bytes in cells_bytes]
+    cosets_evals = [cell_to_coset_evals(cell) for cell in cells]
     proofs = [bytes_to_kzg_proof(proof_bytes) for proof_bytes in proofs_bytes]
 
     return all(
-        verify_kzg_proof_multi_impl(commitment, coset_for_cell(column_index), cell, proof)
-        for commitment, column_index, cell, proof in zip(commitments, column_indices, cells, proofs)
+        verify_kzg_proof_multi_impl(commitment, coset_for_cell(column_index), coset_evals, proof)
+        for commitment, column_index, coset_evals, proof in zip(commitments, column_indices, cosets_evals, proofs)
     )
 ```
 
@@ -612,8 +638,7 @@ def recover_original_data(eval_shifted_extended_evaluation: Sequence[BLSFieldEle
 ### `recover_all_cells`
 
 ```python
-def recover_all_cells(cell_ids: Sequence[CellID],
-                      cells_bytes: Sequence[Vector[Bytes32, FIELD_ELEMENTS_PER_CELL]]) -> Sequence[Cell]:
+def recover_all_cells(cell_ids: Sequence[CellID], cells: Sequence[Cell]) -> Sequence[Cell]:
     """
     Recover all of the cells in the extended blob from FIELD_ELEMENTS_PER_EXT_BLOB evaluations, 
     half of which can be missing.
@@ -625,7 +650,7 @@ def recover_all_cells(cell_ids: Sequence[CellID],
 
     Public method.
     """
-    assert len(cell_ids) == len(cells_bytes)
+    assert len(cell_ids) == len(cells)
     # Check we have enough cells to be able to perform the reconstruction
     assert CELLS_PER_EXT_BLOB / 2 <= len(cell_ids) <= CELLS_PER_EXT_BLOB
     # Check for duplicates
@@ -634,15 +659,15 @@ def recover_all_cells(cell_ids: Sequence[CellID],
     # Get the extended domain
     roots_of_unity_extended = compute_roots_of_unity(FIELD_ELEMENTS_PER_EXT_BLOB)
 
-    # Convert from bytes to cells
-    cells = [bytes_to_cell(cell_bytes) for cell_bytes in cells_bytes]
+    # Convert cells to coset evals
+    cosets_evals = [cell_to_coset_evals(cell) for cell in cells]
 
     missing_cell_ids = [cell_id for cell_id in range(CELLS_PER_EXT_BLOB) if cell_id not in cell_ids]
     zero_poly_coeff, zero_poly_eval = construct_vanishing_polynomial(missing_cell_ids)
 
     eval_shifted_extended_evaluation, eval_shifted_zero_poly, shift_inv = recover_shifted_data(
         cell_ids,
-        cells,
+        cosets_evals,
         zero_poly_eval,
         zero_poly_coeff,
         roots_of_unity_extended,
@@ -655,14 +680,14 @@ def recover_all_cells(cell_ids: Sequence[CellID],
         roots_of_unity_extended,
     )
 
-    for cell_id, cell in zip(cell_ids, cells):
+    for cell_id, coset_evals in zip(cell_ids, cosets_evals):
         start = cell_id * FIELD_ELEMENTS_PER_CELL
         end = (cell_id + 1) * FIELD_ELEMENTS_PER_CELL
-        assert reconstructed_data[start:end] == cell
+        assert reconstructed_data[start:end] == coset_evals
 
     reconstructed_data_as_cells = [
-        reconstructed_data[i * FIELD_ELEMENTS_PER_CELL:(i + 1) * FIELD_ELEMENTS_PER_CELL]
+        coset_evals_to_cell(reconstructed_data[i * FIELD_ELEMENTS_PER_CELL:(i + 1) * FIELD_ELEMENTS_PER_CELL])
         for i in range(CELLS_PER_EXT_BLOB)]
-
+ 
     return reconstructed_data_as_cells
 ```
diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py
index dba8e885a0..cdbfad9ffe 100644
--- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py
+++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/das/test_das.py
@@ -28,7 +28,7 @@ def test_compute_extended_matrix(spec):
     for blob_index, row in enumerate(rows):
         extended_blob = []
         for cell in row:
-            extended_blob.extend(cell)
+            extended_blob.extend(spec.cell_to_coset_evals(cell))
         blob_part = extended_blob[0:len(extended_blob) // 2]
         blob = b''.join([spec.bls_field_to_bytes(x) for x in blob_part])
         assert blob == input_blobs[blob_index]
diff --git a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py
index 1dd9b2d6a7..deb83c223e 100644
--- a/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py
+++ b/tests/core/pyspec/eth2spec/test/eip7594/unittests/polynomial_commitments/test_polynomial_commitments.py
@@ -36,12 +36,10 @@ def test_verify_cell_proof(spec):
     commitment = spec.blob_to_kzg_commitment(blob)
     cells, proofs = spec.compute_cells_and_proofs(blob)
 
-    cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells]
-
     cell_id = 0
-    assert spec.verify_cell_proof(commitment, cell_id, cells_bytes[cell_id], proofs[cell_id])
+    assert spec.verify_cell_proof(commitment, cell_id, cells[cell_id], proofs[cell_id])
     cell_id = 1
-    assert spec.verify_cell_proof(commitment, cell_id, cells_bytes[cell_id], proofs[cell_id])
+    assert spec.verify_cell_proof(commitment, cell_id, cells[cell_id], proofs[cell_id])
 
 
 @with_eip7594_and_later
@@ -51,7 +49,6 @@ def test_verify_cell_proof_batch(spec):
     blob = get_sample_blob(spec)
     commitment = spec.blob_to_kzg_commitment(blob)
     cells, proofs = spec.compute_cells_and_proofs(blob)
-    cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells]
 
     assert len(cells) == len(proofs)
 
@@ -59,7 +56,7 @@ def test_verify_cell_proof_batch(spec):
         row_commitments_bytes=[commitment],
         row_indices=[0, 0],
         column_indices=[0, 4],
-        cells_bytes=[cells_bytes[0], cells_bytes[4]],
+        cells=[cells[0], cells[4]],
         proofs_bytes=[proofs[0], proofs[4]],
     )
 
@@ -75,12 +72,9 @@ def test_recover_all_cells(spec):
 
     # Get the data we will be working with
     blob = get_sample_blob(spec)
-    # Get the data in evaluation form
-    original_polynomial = spec.blob_to_polynomial(blob)
 
     # Extend data with Reed-Solomon and split the extended data in cells
     cells = spec.compute_cells(blob)
-    cells_bytes = [[spec.bls_field_to_bytes(element) for element in cell] for cell in cells]
 
     # Compute the cells we will be recovering from
     cell_ids = []
@@ -91,14 +85,15 @@ def test_recover_all_cells(spec):
             j = rng.randint(0, spec.CELLS_PER_EXT_BLOB - 1)
         cell_ids.append(j)
     # Now the cells themselves
-    known_cells_bytes = [cells_bytes[cell_id] for cell_id in cell_ids]
+    known_cells = [cells[cell_id] for cell_id in cell_ids]
 
     # Recover all of the cells
-    recovered_cells = spec.recover_all_cells(cell_ids, known_cells_bytes)
+    recovered_cells = spec.recover_all_cells(cell_ids, known_cells)
     recovered_data = [x for xs in recovered_cells for x in xs]
 
     # Check that the original data match the non-extended portion of the recovered data
-    assert original_polynomial == recovered_data[:len(recovered_data) // 2]
+    blob_byte_array = [b for b in blob]
+    assert blob_byte_array == recovered_data[:len(recovered_data) // 2]
 
     # Check that the recovered cells match the original cells
     assert cells == recovered_cells