First we'll create an example helper type.

In [220]:
class BitEncoded:
    """Annotation Helper that tracks a bitwidth for an integer."""

    def __init__(self, bitwidth: int):
        self.bitwidth = bitwidth

    def encode(self, value: int) -> str:
        """Encodes the value as a signed integer of self.bitwidth size,
        returning the binary representation as a string without the `0b` prefix.

        Raises if the value is out of bounds"""
        mask: int = 2**self.bitwidth - 1

        lowbits = value & mask

        return format(lowbits, "0{width}b".format(width=self.bitwidth))

    def __repr__(self):
        return f"BitEncoded({self.bitwidth})"


# An example
BitEncoded(bitwidth=5).encode(5)


'00101'

Next, we'll build a dataclass that uses the helper type.

In [221]:
import dataclasses
import typing


@dataclasses.dataclass
class ExampleDataclass:
    quantity: typing.Annotated[int, BitEncoded(3)] = 0
    price: typing.Annotated[int, BitEncoded(20)] = 0


Given the use of `typing.Annotated`, we can add metadata beyond the type that type-checker are concerned about.

We can access this metadata using `typing.get_type_hints`. By default, this only gives the type-checker metadata, but we can pass a flag to trigger the rest.

In [222]:
typing.get_type_hints(ExampleDataclass())


{'quantity': int, 'price': int}

In [223]:
typing.get_type_hints(ExampleDataclass(), include_extras=True)


{'quantity': typing.Annotated[int, BitEncoded(3)],
 'price': typing.Annotated[int, BitEncoded(20)]}

Now, we can use this metadata at runtime.

In [224]:
data = ExampleDataclass(quantity=3, price=350)
annotation_map = typing.get_type_hints(data, include_extras=True)
typing.get_args(annotation_map["price"])[1]


BitEncoded(20)

In [225]:
data = ExampleDataclass(quantity=3, price=350)
encoders = {
    field: item
    for field, annotation in typing.get_type_hints(data, include_extras=True).items()
    for item in list(typing.get_args(annotation))
    if isinstance(item, BitEncoded)
}
encoders


{'quantity': BitEncoded(3), 'price': BitEncoded(20)}

With this in mind, we can easily define a concept like a bit-encoded dataclass

In [226]:
@dataclasses.dataclass
class BitEncodedExample:
    quantity: typing.Annotated[int, BitEncoded(3)]
    price: typing.Annotated[int, BitEncoded(20)]

    def encode(self) -> str:
        """Encodes this instance into a binary string."""
        output = ""
        # Get the dataclass fields by name and value.
        field_dict = {
            field.name: getattr(self, field.name) for field in dataclasses.fields(self)
        }

        # Get the encoders by name of field and encoder
        encoders = {
            field: item
            for field, annotation in typing.get_type_hints(
                self, include_extras=True
            ).items()
            for item in list(typing.get_args(annotation))
            if isinstance(item, BitEncoded) and field in field_dict.keys()
        }

        for name, encoder in encoders.items():
            value = field_dict[name]
            output += encoder.encode(value)

        return output


# Example
data = BitEncodedExample(quantity=3, price=350)
data.encode()


'01100000000000101011110'

Similar concepts can be used for byte-oriented data classes.

Using `struct.Struct(format)` allows for format validation early (at class load), as
well as simplifying the execution of the encoding function.

In [227]:
import struct


@dataclasses.dataclass
class ByteEncodedExample:
    quantity: typing.Annotated[int, struct.Struct(">h")]
    price: typing.Annotated[int, struct.Struct(">l")]

    def encode(self) -> bytes:
        """Encodes this instance into a bytes stream per the encoding args.

        Args are evaluated in declared order.
        """
        field_dict = {
            field.name: getattr(self, field.name) for field in dataclasses.fields(self)
        }
        # Unpack the EncodingArgs.args that align with the fields.
        encodings = {
            field: item
            for field, annotation in typing.get_type_hints(
                self, include_extras=True
            ).items()
            for item in list(typing.get_args(annotation))
            if isinstance(item, struct.Struct) and field in field_dict.keys()
        }

        output = b""
        for name, encoding in encodings.items():
            value = field_dict[name]
            output += encoding.pack(value)
        return output

    @classmethod
    def decode(cls, buffer: bytes) -> "ByteEncodedExample":
        # Unpack with each piece.
        construction_kwargs = {}
        index = 0
        for field, (name, annotation) in zip(dataclasses.fields(cls), typing.get_type_hints(cls, include_extras=True).items()):
            # Get the first instance of `struct.Struct` in the annotation args.
            struct_unpacker = next((x for x in list(typing.get_args(annotation)) if isinstance(x, struct.Struct)), None)
            # Add a "field": value by unpacking from the buffer, grabbing the first item from the returned tuple.
            construction_kwargs.update({field.name: struct_unpacker.unpack_from(buffer, offset=index)[0]})
            index += struct_unpacker.size
            # For demonstration
            print(f"Remaining data: {buffer[index:]}")
        
        return ByteEncodedExample(**construction_kwargs)


With this definition, the following block should give us:
- The `quantity` as an unsigned short (2-bytes): `0x00_03`
- The `price` as an unsigned long (4-bytes): `0x00_00_01_5e`

In [228]:
# Example
data = ByteEncodedExample(quantity=3, price=350)
assert data.encode() == bytes.fromhex("0003 0000015e")
data.encode()


b'\x00\x03\x00\x00\x01^'

And we can recover the instance from the data.

In [229]:
rebuilt = ByteEncodedExample.decode(data.encode())
rebuilt

Remaining data: b'\x00\x00\x01^'
Remaining data: b''


ByteEncodedExample(quantity=3, price=350)