Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Bitlist and Bitvector #1224

Merged
merged 31 commits into from
Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
02f6ba3
Add Bitvector and Bitlist
dankrad Jun 27, 2019
23c7435
Add some tests and fix pack
dankrad Jun 27, 2019
494984f
Fix linting errors
dankrad Jun 27, 2019
d641e94
Cleanups
JustinDrake Jun 27, 2019
67c50cb
Changed attestation and custody bitfields
dankrad Jun 27, 2019
becb7a0
justification_bitfield -> Bitvector[4]
dankrad Jun 27, 2019
80c680e
Phase 1 to Bitvector/Bitlist
dankrad Jun 27, 2019
f57387c
Justification bitvector length to constant
dankrad Jun 27, 2019
a5154da
suggestion to implement bitfield like
protolambda Jun 27, 2019
b574a58
Remove not working py-ssz decoder tests
dankrad Jun 27, 2019
8ed638b
Linter fixes
dankrad Jun 27, 2019
2cb23d3
Merge remote-tracking branch 'origin/bitfield-suggestion' into dankra…
dankrad Jun 27, 2019
afd86f7
Fixes in ssz impl
dankrad Jun 27, 2019
93ce168
More linting fixes
dankrad Jun 27, 2019
7adf07e
A few more tests for Bitvector/Bitlist
dankrad Jun 27, 2019
237b41d
Slice notation for justification_bitfield shift
dankrad Jun 27, 2019
2677d23
Some more (shorter) Bitvector and Bitlist tests
dankrad Jun 27, 2019
2622548
Merge remote-tracking branch 'origin/dev' into dankrad-patch-8
dankrad Jun 28, 2019
196ac42
Cleanup naming
JustinDrake Jun 28, 2019
6f9d374
Cleanups
JustinDrake Jun 28, 2019
e36593b
Add comment
JustinDrake Jun 28, 2019
05842f8
Update 0_beacon-chain.md
JustinDrake Jun 28, 2019
5ff13dd
be explicit about limiting for HTR and chunk padding
protolambda Jun 28, 2019
128bbbc
fix slicing, and support partial slice bounds
protolambda Jun 28, 2019
25db397
fix line length lint problem in checkpoint
protolambda Jun 28, 2019
5f0e583
resolved merge conflicts, take attesters seq->set change from dev, ta…
protolambda Jun 28, 2019
fa84c49
Update specs/core/0_beacon-chain.md
dankrad Jun 28, 2019
6a2d2c8
Bitlist for attestation doc
dankrad Jun 28, 2019
4dcb47e
Update test_libs/pyspec/eth2spec/test/phase_0/block_processing/test_p…
dankrad Jun 28, 2019
be04eb2
Change copy style, and remove deepcopy import
dankrad Jun 28, 2019
4f31207
reword merkleize with limit / length
protolambda Jun 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions scripts/build_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
signing_root,
)
from eth2spec.utils.ssz.ssz_typing import (
Bit, Bool, Container, List, Vector, Bytes, uint64,
bit, boolean, Container, List, Vector, Bytes, uint64,
JustinDrake marked this conversation as resolved.
Show resolved Hide resolved
Bytes4, Bytes32, Bytes48, Bytes96,
)
from eth2spec.utils.bls import (
Expand All @@ -52,7 +52,7 @@
is_empty,
)
from eth2spec.utils.ssz.ssz_typing import (
Bit, Bool, Container, List, Vector, Bytes, uint64,
bit, boolean, Container, List, Vector, Bytes, uint64,
Bytes4, Bytes32, Bytes48, Bytes96,
)
from eth2spec.utils.bls import (
Expand Down Expand Up @@ -174,7 +174,7 @@ def combine_constants(old_constants: Dict[str, str], new_constants: Dict[str, st


ignored_dependencies = [
'Bit', 'Bool', 'Vector', 'List', 'Container', 'Hash', 'BLSPubkey', 'BLSSignature', 'Bytes', 'BytesN'
'bit', 'boolean', 'Vector', 'List', 'Container', 'Hash', 'BLSPubkey', 'BLSSignature', 'Bytes', 'BytesN'
'Bytes4', 'Bytes32', 'Bytes48', 'Bytes96',
'uint8', 'uint16', 'uint32', 'uint64', 'uint128', 'uint256',
'bytes' # to be removed after updating spec doc
Expand Down
4 changes: 2 additions & 2 deletions specs/core/0_beacon-chain.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ class Validator(Container):
pubkey: BLSPubkey
withdrawal_credentials: Hash # Commitment to pubkey for withdrawals and transfers
effective_balance: Gwei # Balance at stake
slashed: Bool
slashed: boolean
# Status epochs
activation_eligibility_epoch: Epoch # When criteria for activation were met
activation_epoch: Epoch
Expand Down Expand Up @@ -337,7 +337,7 @@ class AttestationData(Container):
```python
class AttestationDataAndCustodyBit(Container):
data: AttestationData
custody_bit: Bit # Challengeable bit (SSZ-bool, 1 byte) for the custody of crosslink data
custody_bit: bit # Challengeable bit (SSZ-bool, 1 byte) for the custody of crosslink data
```

#### `IndexedAttestation`
Expand Down
87 changes: 64 additions & 23 deletions specs/simple-serialize.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
- [Default values](#default-values)
- [Illegal types](#illegal-types)
- [Serialization](#serialization)
- [`"uintN"`](#uintn)
- [`"bool"`](#bool)
- [`"null`](#null)
- [`uintN`](#uintn)
- [`boolean`](#boolean)
- [`null`](#null)
- [Vectors, containers, lists, unions](#vectors-containers-lists-unions)
- [Deserialization](#deserialization)
- [Merkleization](#merkleization)
Expand All @@ -37,71 +37,96 @@
## Typing
### Basic types

* `"uintN"`: `N`-bit unsigned integer (where `N in [8, 16, 32, 64, 128, 256]`)
* `"bool"`: `True` or `False`
* `uintN`: `N`-bit unsigned integer (where `N in [8, 16, 32, 64, 128, 256]`)
* `boolean`: `True` or `False`

### Composite types

* **container**: ordered heterogeneous collection of values
* key-pair curly bracket notation `{}`, e.g. `{"foo": "uint64", "bar": "bool"}`
* python dataclass notation with key-type pairs, e.g.
```python
class ContainerExample(Container):
foo: uint64
bar: boolean
```
* **vector**: ordered fixed-length homogeneous collection of values
* angle bracket notation `[type, N]`, e.g. `["uint64", N]`
* **list**: ordered variable-length homogeneous collection of values
* angle bracket notation `[type]`, e.g. `["uint64"]`
* notation `Vector[type, N]`, e.g. `Vector[uint64, N]`
* **list**: ordered variable-length homogeneous collection of values, with maximum length `N`
* notation `List[type, N]`, e.g. `List[uint64, N]`
* **union**: union type containing one of the given subtypes
* round bracket notation `(type_1, type_2, ...)`, e.g. `("null", "uint64")`
* notation `Union[type_1, type_2, ...]`, e.g. `union[null, uint64]`
* **Bitvector**: a fixed-length list of `boolean` values
* notation `Bitvector[N]`
* **Bitlist**: a variable-length list of `boolean` values with maximum length `N`
* notation `Bitlist[N]`

### Variable-size and fixed-size

We recursively define "variable-size" types to be lists and unions and all types that contain a variable-size type. All other types are said to be "fixed-size".
We recursively define "variable-size" types to be lists, unions, `Bitlist` and all types that contain a variable-size type. All other types are said to be "fixed-size".

### Aliases

For convenience we alias:

* `"byte"` to `"uint8"` (this is a basic type)
* `"bytes"` to `["byte"]` (this is *not* a basic type)
* `"bytesN"` to `["byte", N]` (this is *not* a basic type)
* `"null"`: `{}`, i.e. the empty container
* `bit` to `boolean`
* `byte` to `uint8` (this is a basic type)
* `BytesN` to `Vector[byte, N]` (this is *not* a basic type)
* `null`: `{}`, i.e. the empty container

### Default values

The default value of a type upon initialization is recursively defined using `0` for `"uintN"`, `False` for `"bool"`, and `[]` for lists. Unions default to the first type in the union (with type index zero), which is `"null"` if present in the union.
The default value of a type upon initialization is recursively defined using `0` for `uintN`, `False` for `boolean` and the elements of `Bitvector`, and `[]` for lists and `Bitlist`. Unions default to the first type in the union (with type index zero), which is `null` if present in the union.

#### `is_empty`

An SSZ object is called empty (and thus, `is_empty(object)` returns true) if it is equal to the default value for that type.

### Illegal types

Empty vector types (i.e. `[subtype, 0]` for some `subtype`) are not legal. The `"null"` type is only legal as the first type in a union subtype (i.e. with type index zero).
Empty vector types (i.e. `[subtype, 0]` for some `subtype`) are not legal. The `null` type is only legal as the first type in a union subtype (i.e. with type index zero).

## Serialization

We recursively define the `serialize` function which consumes an object `value` (of the type specified) and returns a bytestring of type `"bytes"`.
We recursively define the `serialize` function which consumes an object `value` (of the type specified) and returns a bytestring of type `bytes`.

*Note*: In the function definitions below (`serialize`, `hash_tree_root`, `signing_root`, `is_variable_size`, etc.) objects implicitly carry their type.

### `"uintN"`
### `uintN`

```python
assert N in [8, 16, 32, 64, 128, 256]
return value.to_bytes(N // 8, "little")
```

### `"bool"`
### `boolean`

```python
assert value in (True, False)
return b"\x01" if value is True else b"\x00"
```

### `"null"`
### `null`

```python
return b""
```

### `Bitvector[N]`

```python
as_integer = sum([value[i] << i for i in range(len(value))])
return as_integer.to_bytes((N + 7) // 8, "little")
```

### `Bitlist[N]`

Note that from the offset coding, the length (in bytes) of the bitlist is known. An additional leading `1` bit is added so that the length in bits will also be known.

```python
as_integer = (1 << len(value)) + sum([value[i] << i for i in range(len(value))])
return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little")
```

### Vectors, containers, lists, unions

```python
Expand Down Expand Up @@ -142,17 +167,33 @@ We first define helper functions:

* `pack`: Given ordered objects of the same basic type, serialize them, pack them into `BYTES_PER_CHUNK`-byte chunks, right-pad the last chunk with zero bytes, and return the chunks.
* `merkleize`: Given ordered `BYTES_PER_CHUNK`-byte chunks, if necessary append zero chunks so that the number of chunks is a power of two, Merkleize the chunks, and return the root. Note that `merkleize` on a single chunk is simply that chunk, i.e. the identity when the number of chunks is one.
* `pad`: given a list `l` and a length `N`, adds `N-len(l)` empty objects to the end of the list (the type of the empty object is implicit in the list type)
* `mix_in_length`: Given a Merkle root `root` and a length `length` (`"uint256"` little-endian serialization) return `hash(root + length)`.
* `mix_in_type`: Given a Merkle root `root` and a type_index `type_index` (`"uint256"` little-endian serialization) return `hash(root + type_index)`.

We now define Merkleization `hash_tree_root(value)` of an object `value` recursively:

* `merkleize(pack(value))` if `value` is a basic object or a vector of basic objects
* `mix_in_length(merkleize(pack(value)), len(value))` if `value` is a list of basic objects
* `mix_in_length(merkleize(pack(pad(value, N))), len(value))` if `value` is a list of basic objects
* `merkleize([hash_tree_root(element) for element in value])` if `value` is a vector of composite objects or a container
* `mix_in_length(merkleize([hash_tree_root(element) for element in value]), len(value))` if `value` is a list of composite objects
* `mix_in_length(merkleize([hash_tree_root(element) for element in pad(value, N)]), len(value))` if `value` is a list of composite objects
* `mix_in_type(merkleize(value.value), value.type_index)` if `value` is of union type

### Merkleization of `Bitvector[N]`

```python
as_integer = sum([value[i] << i for i in range(len(value))])
return merkleize(as_integer.to_bytes((N + 7) // 8, "little"))
```

### `Bitlist[N]`

```python
as_integer = sum([value[i] << i for i in range(len(value))])
return mix_in_length(merkleize(as_integer.to_bytes((N + 7) // 8, "little")), len(value))
```


## Self-signed containers

Let `value` be a self-signed container object. The convention is that the signature (e.g. a `"bytes96"` BLS12-381 signature) be the last field of `value`. Further, the signed message for `value` is `signing_root(value) = hash_tree_root(truncate_last(value))` where `truncate_last` truncates the last element of `value`.
Expand Down
4 changes: 2 additions & 2 deletions test_libs/pyspec/eth2spec/debug/decode.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import Any
from eth2spec.utils.ssz.ssz_impl import hash_tree_root
from eth2spec.utils.ssz.ssz_typing import (
SSZType, SSZValue, uint, Container, Bytes, List, Bool,
SSZType, SSZValue, uint, Container, Bytes, List, boolean,
Vector, BytesN
)


def decode(data: Any, typ: SSZType) -> SSZValue:
if issubclass(typ, (uint, Bool)):
if issubclass(typ, (uint, boolean)):
return typ(data)
elif issubclass(typ, (List, Vector)):
return typ(decode(element, typ.elem_type) for element in data)
Expand Down
4 changes: 2 additions & 2 deletions test_libs/pyspec/eth2spec/debug/encode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from eth2spec.utils.ssz.ssz_impl import hash_tree_root
from eth2spec.utils.ssz.ssz_typing import (
SSZValue, uint, Container, Bool
SSZValue, uint, Container, boolean
)


Expand All @@ -10,7 +10,7 @@ def encode(value: SSZValue, include_hash_tree_roots=False):
if value.type().byte_len > 8:
return str(int(value))
return int(value)
elif isinstance(value, Bool):
elif isinstance(value, boolean):
return value == 1
elif isinstance(value, list): # normal python lists, ssz-List, Vector
return [encode(element, include_hash_tree_roots) for element in value]
Expand Down
8 changes: 4 additions & 4 deletions test_libs/pyspec/eth2spec/debug/random_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from enum import Enum

from eth2spec.utils.ssz.ssz_typing import (
SSZType, SSZValue, BasicValue, BasicType, uint, Container, Bytes, List, Bool,
SSZType, SSZValue, BasicValue, BasicType, uint, Container, Bytes, List, boolean,
Vector, BytesN
)

Expand Down Expand Up @@ -118,7 +118,7 @@ def get_random_bytes_list(rng: Random, length: int) -> bytes:


def get_random_basic_value(rng: Random, typ: BasicType) -> BasicValue:
if issubclass(typ, Bool):
if issubclass(typ, boolean):
return typ(rng.choice((True, False)))
elif issubclass(typ, uint):
assert typ.byte_len in UINT_BYTE_SIZES
Expand All @@ -128,7 +128,7 @@ def get_random_basic_value(rng: Random, typ: BasicType) -> BasicValue:


def get_min_basic_value(typ: BasicType) -> BasicValue:
if issubclass(typ, Bool):
if issubclass(typ, boolean):
return typ(False)
elif issubclass(typ, uint):
assert typ.byte_len in UINT_BYTE_SIZES
Expand All @@ -138,7 +138,7 @@ def get_min_basic_value(typ: BasicType) -> BasicValue:


def get_max_basic_value(typ: BasicType) -> BasicValue:
if issubclass(typ, Bool):
if issubclass(typ, boolean):
return typ(True)
elif issubclass(typ, uint):
assert typ.byte_len in UINT_BYTE_SIZES
Expand Down
4 changes: 2 additions & 2 deletions test_libs/pyspec/eth2spec/fuzzing/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def translate_typ(typ) -> ssz.BaseSedes:
return ssz.Vector(translate_typ(typ.elem_type), typ.length)
elif issubclass(typ, spec_ssz.List):
return ssz.List(translate_typ(typ.elem_type))
elif issubclass(typ, spec_ssz.Bool):
elif issubclass(typ, spec_ssz.boolean):
return ssz.boolean
elif issubclass(typ, spec_ssz.uint):
if typ.byte_len == 1:
Expand Down Expand Up @@ -64,7 +64,7 @@ def translate_value(value, typ):
raise TypeError("invalid uint size")
elif issubclass(typ, spec_ssz.List):
return [translate_value(elem, typ.elem_type) for elem in value]
elif issubclass(typ, spec_ssz.Bool):
elif issubclass(typ, spec_ssz.boolean):
return value
elif issubclass(typ, spec_ssz.Vector):
return typ(*(translate_value(elem, typ.elem_type) for elem in value))
Expand Down
21 changes: 17 additions & 4 deletions test_libs/pyspec/eth2spec/utils/ssz/ssz_impl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from ..merkle_minimal import merkleize_chunks
from ..hash_function import hash
from .ssz_typing import (
SSZValue, SSZType, BasicValue, BasicType, Series, Elements, Bool, Container, List, Bytes, uint,
SSZValue, SSZType, BasicValue, BasicType, Series, Elements, boolean, Container, List, Bytes,
Bitlist, Bitvector, uint,
)

# SSZ Serialization
Expand All @@ -13,7 +14,7 @@
def serialize_basic(value: SSZValue):
if isinstance(value, uint):
return value.to_bytes(value.type().byte_len, 'little')
elif isinstance(value, Bool):
elif isinstance(value, boolean):
if value:
return b'\x01'
else:
Expand All @@ -25,7 +26,7 @@ def serialize_basic(value: SSZValue):
def deserialize_basic(value, typ: BasicType):
if issubclass(typ, uint):
return typ(int.from_bytes(value, 'little'))
elif issubclass(typ, Bool):
elif issubclass(typ, boolean):
assert value in (b'\x00', b'\x01')
return typ(value == b'\x01')
else:
Expand All @@ -39,6 +40,12 @@ def is_empty(obj: SSZValue):
def serialize(obj: SSZValue):
if isinstance(obj, BasicValue):
return serialize_basic(obj)
elif isinstance(obj, Bitvector):
as_integer = sum([obj[i] << i for i in range(len(obj))])
return as_integer.to_bytes((len(obj) + 7) // 8, "little")
elif isinstance(obj, Bitlist):
as_integer = (1 << len(obj)) + sum([obj[i] << i for i in range(len(obj))])
return as_integer.to_bytes((as_integer.bit_length() + 7) // 8, "little")
elif isinstance(obj, Series):
return encode_series(obj)
else:
Expand Down Expand Up @@ -85,6 +92,12 @@ def encode_series(values: Series):
def pack(values: Series):
if isinstance(values, bytes): # Bytes and BytesN are already packed
return values
elif isinstance(values, Bitvector):
as_integer = sum([values[i] << i for i in range(len(values))])
return as_integer.to_bytes((values.length + 7) // 8, "little")
elif isinstance(values, Bitlist):
as_integer = sum([values[i] << i for i in range(len(values))])
return as_integer.to_bytes((values.length + 7) // 8, "little")
return b''.join([serialize_basic(value) for value in values])


Expand Down Expand Up @@ -134,7 +147,7 @@ def hash_tree_root(obj: SSZValue):
else:
raise Exception(f"Type not supported: {type(obj)}")

if isinstance(obj, (List, Bytes)):
if isinstance(obj, (List, Bytes, Bitlist)):
return mix_in_length(merkleize_chunks(leaves, pad_to=chunk_count(obj.type())), len(obj))
else:
return merkleize_chunks(leaves)
Expand Down