ethereum · hwwhww · Feb 14, 2024 · Jan 31, 2024 · Jan 23, 2024 · Jan 31, 2024
diff --git a/specs/_features/eip7594/polynomial-commitments-sampling.md b/specs/_features/eip7594/polynomial-commitments-sampling.md
@@ -42,6 +42,9 @@
     - [`verify_cell_proof`](#verify_cell_proof)
     - [`verify_cell_proof_batch`](#verify_cell_proof_batch)
 - [Reconstruction](#reconstruction)
+  - [`construct_vanishing_polynomial`](#construct_vanishing_polynomial)
+  - [`recover_shifted_data`](#recover_shifted_data)
+  - [`recover_original_data`](#recover_original_data)
   - [`recover_polynomial`](#recover_polynomial)
 
 <!-- END doctoc generated TOC please keep comment here to allow auto update -->
@@ -76,9 +79,10 @@ Cells are the smallest unit of blob data that can come with their own KZG proofs
 
 | Name | Value | Description |
 | - | - | - |
+| `FIELD_ELEMENTS_PER_EXT_BLOB` | `2 * FIELD_ELEMENTS_PER_BLOB` | Number of field elements in a Reed-Solomon extended blob |
 | `FIELD_ELEMENTS_PER_CELL` | `uint64(64)` | Number of field elements in a cell |
 | `BYTES_PER_CELL` | `FIELD_ELEMENTS_PER_CELL * BYTES_PER_FIELD_ELEMENT` | The number of bytes in a cell |
-| `CELLS_PER_BLOB` | `((2 * FIELD_ELEMENTS_PER_BLOB) // FIELD_ELEMENTS_PER_CELL)` | The number of cells in a blob |
+| `CELLS_PER_BLOB` | `FIELD_ELEMENTS_PER_EXT_BLOB // FIELD_ELEMENTS_PER_CELL` | The number of cells in a blob |
 | `RANDOM_CHALLENGE_KZG_CELL_BATCH_DOMAIN` | `b'RCKZGCBATCH__V1_'` |
 
 ## Helper functions
@@ -352,7 +356,7 @@ def coset_for_cell(cell_id: CellID) -> Cell:
     """
     assert cell_id < CELLS_PER_BLOB
     roots_of_unity_brp = bit_reversal_permutation(
-        compute_roots_of_unity(2 * FIELD_ELEMENTS_PER_BLOB)
+        compute_roots_of_unity(FIELD_ELEMENTS_PER_EXT_BLOB)
     )
     return Cell(roots_of_unity_brp[FIELD_ELEMENTS_PER_CELL * cell_id:FIELD_ELEMENTS_PER_CELL * (cell_id + 1)])
 ```
@@ -402,7 +406,7 @@ def compute_cells(blob: Blob) -> Vector[Cell, CELLS_PER_BLOB]:
     polynomial_coeff = polynomial_eval_to_coeff(polynomial)
 
     extended_data = fft_field(polynomial_coeff + [0] * FIELD_ELEMENTS_PER_BLOB,
-                              compute_roots_of_unity(2 * FIELD_ELEMENTS_PER_BLOB))
+                              compute_roots_of_unity(FIELD_ELEMENTS_PER_EXT_BLOB))
     extended_data_rbo = bit_reversal_permutation(extended_data)
     return [extended_data_rbo[i * FIELD_ELEMENTS_PER_CELL:(i + 1) * FIELD_ELEMENTS_PER_CELL]
             for i in range(CELLS_PER_BLOB)]
@@ -471,84 +475,159 @@ def verify_cell_proof_batch(row_commitments_bytes: Sequence[Bytes48],
 
 ## Reconstruction
 
-### `recover_polynomial`
+### `construct_vanishing_polynomial`
 
 ```python
-def recover_polynomial(cell_ids: Sequence[CellID],
-                       cells_bytes: Sequence[Vector[Bytes32, FIELD_ELEMENTS_PER_CELL]]) -> Polynomial:
+def construct_vanishing_polynomial(missing_cell_ids: Sequence[CellID]) -> Tuple[
+        Sequence[BLSFieldElement],
+        Sequence[BLSFieldElement]]:
     """
-    Recovers a polynomial from 2 * FIELD_ELEMENTS_PER_CELL evaluations, half of which can be missing.
-
-    This algorithm uses FFTs to recover cells faster than using Lagrange implementation. However,
-    a faster version thanks to Qi Zhou can be found here:
-    https://github.com/ethereum/research/blob/51b530a53bd4147d123ab3e390a9d08605c2cdb8/polynomial_reconstruction/polynomial_reconstruction_danksharding.py
-
-    Public method.
+    Given the cells that are missing from the data, compute the polynomial that vanishes at every point that
+    corresponds to a missing field element.
     """
-    assert len(cell_ids) == len(cells_bytes)
-
-    cells = [bytes_to_cell(cell_bytes) for cell_bytes in cells_bytes]
-
-    assert len(cells) >= CELLS_PER_BLOB // 2
-    missing_cell_ids = [cell_id for cell_id in range(CELLS_PER_BLOB) if cell_id not in cell_ids]
+    # Get the small domain
     roots_of_unity_reduced = compute_roots_of_unity(CELLS_PER_BLOB)
+
+    # Compute polynomial that vanishes at all the missing cells (over the small domain)
     short_zero_poly = vanishing_polynomialcoeff([
-        roots_of_unity_reduced[reverse_bits(cell_id, CELLS_PER_BLOB)]
-        for cell_id in missing_cell_ids
+        roots_of_unity_reduced[reverse_bits(missing_cell_id, CELLS_PER_BLOB)]
+        for missing_cell_id in missing_cell_ids
     ])
 
-    full_zero_poly = []
-    for i in short_zero_poly:
-        full_zero_poly.append(i)
-        full_zero_poly.extend([0] * (FIELD_ELEMENTS_PER_CELL - 1))
-    full_zero_poly = full_zero_poly + [0] * (2 * FIELD_ELEMENTS_PER_BLOB - len(full_zero_poly))
+    # Extend vanishing polynomial to full domain using the closed form of the vanishing polynomial over a coset
+    zero_poly_coeff = [0] * FIELD_ELEMENTS_PER_EXT_BLOB
+    for i, coeff in enumerate(short_zero_poly):
+        zero_poly_coeff[i * FIELD_ELEMENTS_PER_CELL] = coeff
 
-    zero_poly_eval = fft_field(full_zero_poly,
-                               compute_roots_of_unity(2 * FIELD_ELEMENTS_PER_BLOB))
+    # Compute evaluations of the extended vanishing polynomial
+    zero_poly_eval = fft_field(zero_poly_coeff,
+                               compute_roots_of_unity(FIELD_ELEMENTS_PER_EXT_BLOB))
     zero_poly_eval_brp = bit_reversal_permutation(zero_poly_eval)
-    for cell_id in missing_cell_ids:
-        start = cell_id * FIELD_ELEMENTS_PER_CELL
-        end = (cell_id + 1) * FIELD_ELEMENTS_PER_CELL
-        assert zero_poly_eval_brp[start:end] == [0] * FIELD_ELEMENTS_PER_CELL
-    for cell_id in cell_ids:
+
+    # Sanity check
+    for cell_id in range(CELLS_PER_BLOB):
         start = cell_id * FIELD_ELEMENTS_PER_CELL
         end = (cell_id + 1) * FIELD_ELEMENTS_PER_CELL
-        assert all(a != 0 for a in zero_poly_eval_brp[start:end])
+        if cell_id in missing_cell_ids:
+            assert all(a == 0 for a in zero_poly_eval_brp[start:end])
+        else:  # cell_id in cell_ids
+            assert all(a != 0 for a in zero_poly_eval_brp[start:end])
+
+    return zero_poly_coeff, zero_poly_eval, zero_poly_eval_brp
+```
+
+### `recover_shifted_data`
 
-    extended_evaluation_rbo = [0] * (FIELD_ELEMENTS_PER_BLOB * 2)
+```python
+def recover_shifted_data(cell_ids: Sequence[CellID],
+                         cells: Sequence[Cell],
+                         zero_poly_eval: Sequence[BLSFieldElement],
+                         zero_poly_coeff: Sequence[BLSFieldElement],
+                         roots_of_unity_extended: Sequence[BLSFieldElement]) -> Tuple[
+                             Sequence[BLSFieldElement],
+                             Sequence[BLSFieldElement],
+                             BLSFieldElement]:
+    """
+    Given Z(x), return polynomial Q_1(x)=(E*Z)(k*x) and Q_2(x)=Z(k*x) and k^{-1}.
+    """
+    shift_factor = BLSFieldElement(PRIMITIVE_ROOT_OF_UNITY)
+    shift_inv = div(BLSFieldElement(1), shift_factor)
+
+    extended_evaluation_rbo = [0] * FIELD_ELEMENTS_PER_EXT_BLOB
     for cell_id, cell in zip(cell_ids, cells):
         start = cell_id * FIELD_ELEMENTS_PER_CELL
         end = (cell_id + 1) * FIELD_ELEMENTS_PER_CELL
         extended_evaluation_rbo[start:end] = cell
     extended_evaluation = bit_reversal_permutation(extended_evaluation_rbo)
 
+    # Compute (E*Z)(x)
     extended_evaluation_times_zero = [BLSFieldElement(int(a) * int(b) % BLS_MODULUS)
                                       for a, b in zip(zero_poly_eval, extended_evaluation)]
 
-    roots_of_unity_extended = compute_roots_of_unity(2 * FIELD_ELEMENTS_PER_BLOB)
-
     extended_evaluations_fft = fft_field(extended_evaluation_times_zero, roots_of_unity_extended, inv=True)
 
-    shift_factor = BLSFieldElement(PRIMITIVE_ROOT_OF_UNITY)
-    shift_inv = div(BLSFieldElement(1), shift_factor)
-
+    # Compute (E*Z)(k*x)
     shifted_extended_evaluation = shift_polynomialcoeff(extended_evaluations_fft, shift_factor)
-    shifted_zero_poly = shift_polynomialcoeff(full_zero_poly, shift_factor)
+    # Compute Z(k*x)
+    shifted_zero_poly = shift_polynomialcoeff(zero_poly_coeff, shift_factor)
 
     eval_shifted_extended_evaluation = fft_field(shifted_extended_evaluation, roots_of_unity_extended)
     eval_shifted_zero_poly = fft_field(shifted_zero_poly, roots_of_unity_extended)
 
+    return eval_shifted_extended_evaluation, eval_shifted_zero_poly, shift_inv
+```
+
+### `recover_original_data`
+
+```python
+def recover_original_data(eval_shifted_extended_evaluation: Sequence[BLSFieldElement],
+                          eval_shifted_zero_poly: Sequence[BLSFieldElement],
+                          shift_inv: BLSFieldElement,
+                          roots_of_unity_extended: Sequence[BLSFieldElement]) -> Sequence[BLSFieldElement]:
+    """
+    Given Q_1, Q_2 and k^{-1}, compute P(x).
+    """
+    # Compute Q_3 = Q_1(x)/Q_2(x) = P(k*x)
     eval_shifted_reconstructed_poly = [
         div(a, b)
         for a, b in zip(eval_shifted_extended_evaluation, eval_shifted_zero_poly)
     ]
 
     shifted_reconstructed_poly = fft_field(eval_shifted_reconstructed_poly, roots_of_unity_extended, inv=True)
 
+    # Unshift P(k*x) by k^{-1} to get P(x)
     reconstructed_poly = shift_polynomialcoeff(shifted_reconstructed_poly, shift_inv)
 
     reconstructed_data = bit_reversal_permutation(fft_field(reconstructed_poly, roots_of_unity_extended))
 
+    return reconstructed_data
+```
+
+### `recover_polynomial`
+
+```python
+def recover_polynomial(cell_ids: Sequence[CellID],
+                       cells_bytes: Sequence[Vector[Bytes32, FIELD_ELEMENTS_PER_CELL]]) -> Polynomial:
+    """
+    Recover original polynomial from FIELD_ELEMENTS_PER_EXT_BLOB evaluations, half of which can be missing. This
+    algorithm uses FFTs to recover cells faster than using Lagrange implementation, as can be seen here:
+    https://ethresear.ch/t/reed-solomon-erasure-code-recovery-in-n-log-2-n-time-with-ffts/3039
+
+    A faster version thanks to Qi Zhou can be found here:
+    https://github.com/ethereum/research/blob/51b530a53bd4147d123ab3e390a9d08605c2cdb8/polynomial_reconstruction/polynomial_reconstruction_danksharding.py
+
+    Public method.
+    """
+    assert len(cell_ids) == len(cells_bytes)
+    # Check we have enough cells to be able to perform the reconstruction
+    assert CELLS_PER_BLOB / 2 <= len(cell_ids) <= CELLS_PER_BLOB
+    # Check for duplicates
+    assert len(cell_ids) == len(set(cell_ids))
+
+    # Get the extended domain
+    roots_of_unity_extended = compute_roots_of_unity(FIELD_ELEMENTS_PER_EXT_BLOB)
+
+    # Convert from bytes to cells
+    cells = [bytes_to_cell(cell_bytes) for cell_bytes in cells_bytes]
+
+    missing_cell_ids = [cell_id for cell_id in range(CELLS_PER_BLOB) if cell_id not in cell_ids]
+    zero_poly_coeff, zero_poly_eval, zero_poly_eval_brp = construct_vanishing_polynomial(missing_cell_ids)
+
+    eval_shifted_extended_evaluation, eval_shifted_zero_poly, shift_inv = recover_shifted_data(
+        cell_ids,
+        cells,
+        zero_poly_eval,
+        zero_poly_coeff,
+        roots_of_unity_extended,
+    )
+
+    reconstructed_data = recover_original_data(
+        eval_shifted_extended_evaluation,
+        eval_shifted_zero_poly,
+        shift_inv,
+        roots_of_unity_extended,
+    )
+
     for cell_id, cell in zip(cell_ids, cells):
         start = cell_id * FIELD_ELEMENTS_PER_CELL
         end = (cell_id + 1) * FIELD_ELEMENTS_PER_CELL