Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
TingDaoK committed Apr 19, 2024
1 parent 1f19817 commit 0e3370d
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 98 deletions.
99 changes: 75 additions & 24 deletions awscrt/cbor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ class AwsCborElementType(IntEnum):
InfMap = 15


class AwsCborTags(IntEnum):
# Corresponding to `enum aws_cbor_tags` in aws/common/cbor.h
StandardTime = 0
EpochTime = 1
UnsignedBigNum = 2
NegativeBigNum = 3
DecimalFraction = 4
BigFloat = 5
Unclassified = 6


class AwsCborEncoder(NativeResource):
""" Encoder for CBOR """

Expand All @@ -36,35 +47,57 @@ def __init__(self):
self._binding = _awscrt.cbor_encoder_new()

def get_encoded_data(self) -> bytes:
"""Return the current encoded data as bytes
Returns:
bytes: The encoded data currently
"""
return _awscrt.cbor_encoder_get_encoded_data(self._binding)

def write_int(self, val: int):
"""Write an int as cbor formatted, -2^64 to 2^64 - 1 inclusive.
Otherwise, overflow will be raised.
"""Write an int as cbor formatted,
val less than -2^64 will be encoded as Negative bignum for CBOR
val between -2^64 to -1, inclusive, will be encode as negative integer for CBOR
val between 0 to 2^64 - 1, inclusive, will be encoded as unsigned integer for CBOR
val greater than 2^64 - 1 will be encoded as Unsigned bignum for CBOR
Args:
val (int): value to be encoded and written to the encoded data.
"""
if val < -2**64 or val > 2**64 - 1:
raise OverflowError(f"{val} is overflowed to be encoded into cbor integers")
assert isinstance(val, int)
val_to_encode = val
if val < 0:
# For negative value, the value to encode is -1 - val.
val_to_encode = -1 - val
bit_len = val_to_encode.bit_length()
if bit_len > 64:
# Bignum
bytes_len = bit_len // 8
if bit_len % 8 > 0:
bytes_len += 1
bytes_val = val_to_encode.to_bytes(bytes_len, "big")
if val < 0:
self.write_tag(AwsCborTags.NegativeBigNum) # tag for negative bignum
else:
self.write_tag(AwsCborTags.UnsignedBigNum) # tag for unsigned bignum
return self.write_bytes(bytes_val)

if val >= 0:
return _awscrt.cbor_encoder_write_unsigned_int(self._binding, val)
return _awscrt.cbor_encoder_write_unsigned_int(self._binding, val_to_encode)
else:
return _awscrt.cbor_encoder_write_negative_int(self._binding, -1 - val)
return _awscrt.cbor_encoder_write_negative_int(self._binding, val_to_encode)

def write_float(self, val: Union[int, float]):
def write_float(self, val: float):
"""Write a double as cbor formatted
If the val can be convert the int without loss of precision,
it will be converted to int to be written to as cbor formatted.
Args:
val (float): value to be encoded and written to the encoded data.
"""
if isinstance(val, int):
self.write_int(val)
elif isinstance(val, float):
return _awscrt.cbor_encoder_write_float(self._binding, val)
assert isinstance(val, float)
# Floating point numbers are usually implemented using double in C
return _awscrt.cbor_encoder_write_float(self._binding, val)

def write_bytes(self, val: bytes):
"""Write bytes as cbor formatted
Expand Down Expand Up @@ -125,7 +158,7 @@ def write_bool(self, val: bool):

def write_data_item(self, data_item: Any):
"""Generic API to write any type of an data_item as cbor formatted.
TODO: timestamp?
TODO: timestamp <-> datetime?? Decimal fraction <-> decimal??
Args:
data_item (Any): any type of data_item. If the type is not supported to be converted to cbor format, ValueError will be raised.
Expand Down Expand Up @@ -210,53 +243,54 @@ def pop_next_tag_val(self) -> int:
return _awscrt.cbor_decoder_pop_next_tag_val(self._binding)

def pop_next_numeric(self) -> Union[int, float]:
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
if type == AwsCborElementType.UnsignedInt:
return self.pop_next_unsigned_int()
elif type == AwsCborElementType.NegativeInt:
return self.pop_next_negative_int()
elif type == AwsCborElementType.Float:
return self.pop_next_double()
# TODO: support bignum?
# TODO: Instead of ValueError, probably raise the same error from C with the same AWS_ERROR_CBOR_UNEXPECTED_TYPE
raise ValueError("the cbor src is not a numeric type to decode")

def pop_next_inf_bytes(self) -> bytes:
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
if type != AwsCborElementType.InfBytes:
raise ValueError("the cbor src is not an indefinite bytes to decode")
result = b""
# Consume the inf_bytes
self.consume_next_element()
while type != AwsCborElementType.Break:
result += self.pop_next_bytes()
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
# Consume the break
self.consume_next_element()
return result

def pop_next_inf_str(self) -> bytes:
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
if type != AwsCborElementType.InfStr:
raise ValueError("the cbor src is not an indefinite string to decode")
result = ""
# Consume the inf_str
self.consume_next_element()
while type != AwsCborElementType.Break:
result += self.pop_next_str()
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
# Consume the break
self.consume_next_element()
return result

def pop_next_list(self) -> list:
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
return_val = []
if type == AwsCborElementType.InfArray:
# Consume the inf_array
self.consume_next_element()
while type != AwsCborElementType.Break:
return_val.append(self.pop_next_data_item())
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
# Consume the break
self.consume_next_element()
return return_val
Expand All @@ -269,14 +303,14 @@ def pop_next_list(self) -> list:
raise ValueError("the cbor src is not a list to decode")

def pop_next_map(self) -> dict:
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
return_val = {}
if type == AwsCborElementType.InfMap:
# Consume the inf_map
self.consume_next_element()
while type != AwsCborElementType.Break:
return_val[self.pop_next_data_item()] = self.pop_next_data_item()
type = self.peek_next_type()
type = _awscrt.cbor_decoder_peek_type(self._binding)
# Consume the break
self.consume_next_element()
return return_val
Expand All @@ -291,9 +325,11 @@ def pop_next_map(self) -> dict:
raise ValueError("the cbor src is not a map to decode")

def pop_next_data_item(self) -> Any:
# TODO: tag, timestamp
# TODO: timestamp, decimal fraction
# TODO: maybe wrote all those if elif in the binding level, so that we can use switch at least???
type = self.peek_next_type()
# And possible to avoid some call cross language boundary???
# TODO: If it fails in the middle, with bunch of stuff already popped. Do we want a way to resume??
type = _awscrt.cbor_decoder_peek_type(self._binding)
if type == AwsCborElementType.UnsignedInt or \
type == AwsCborElementType.NegativeInt or \
type == AwsCborElementType.Float:
Expand All @@ -304,7 +340,9 @@ def pop_next_data_item(self) -> Any:
return self.pop_next_str()
elif type == AwsCborElementType.Bool:
return self.pop_next_bool()
elif type == AwsCborElementType.Null:
elif type == AwsCborElementType.Null or \
type == AwsCborElementType.Undefined:
# Treat both NULL and Undefined as None.
self.consume_next_element()
return None
elif type == AwsCborElementType.ArrayStart or \
Expand All @@ -317,5 +355,18 @@ def pop_next_data_item(self) -> Any:
return self.pop_next_inf_bytes()
elif type == AwsCborElementType.InfStr:
return self.pop_next_inf_str()
elif type == AwsCborElementType.Tag:
tag_val = self.pop_next_tag_val()
if tag_val == AwsCborTags.NegativeBigNum:
bytes_val = self.pop_next_bytes()
return -1 - int.from_bytes(bytes_val, "big")
elif tag_val == AwsCborTags.UnsignedBigNum:
bytes_val = self.pop_next_bytes()
return int.from_bytes(bytes_val, "big")
else:
raise ValueError(f"unsupported tag value: {tag_val}")
else:
raise ValueError(f"unsupported type: {type.name}")

def pop_next_data_item_2(self) -> Any:
return _awscrt.cbor_decoder_pop_next_data_item(self._binding)
91 changes: 91 additions & 0 deletions benchmark_cbor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from awscrt.cbor import *
import random
import time
import cbor2


def ns_to_secs(ns: int) -> float:
return ns / 1_000_000_000.0


def bytes_to_MiB(bytes: int) -> float:
return bytes / float(1024**2)


class TestData:
# generate predictable, but variable test values of different types
@staticmethod
def random_value(i=0, seed=0):
r = random.Random(i + seed) # use the index as the seed for predictable results
random_number = TestData.random_number(r, 5)
if random_number == 0:
return f"Some String value {i}"
elif random_number == 1:
return r.random() # a float value
elif random_number == 2:
return TestData.random_number(r, 100000) # a large integer
elif random_number == 3:
return list(range(TestData.random_number(r, 100))) # an array
elif random_number == 4:
return {"a": 1, "b": 2, "c": 3} # a hash
else:
return "generic string"

# generate a predictable, but variable hash with a range of data types
@staticmethod
def test_hash(n_keys=5, seed=0):
return {f"key{i}": TestData.random_value(i, seed) for i in range(n_keys)}

@staticmethod
def random_number(r, n):
return int(r.random() * n)


t = TestData.test_hash(100000)


print("cbor2 -- encode")
run_start_ns = time.perf_counter_ns()
cbor2_encoded = cbor2.dumps(t)
run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns)
print(f"encoded MB: {bytes_to_MiB(len(cbor2_encoded))}")
print(f"time passed: {run_secs} secs")


print("CRT -- encode")
encoder = AwsCborEncoder()

run_start_ns = time.perf_counter_ns()
encoder.write_data_item(t)
encoded = encoder.get_encoded_data()
run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns)
print(f"encoded MB: {bytes_to_MiB(len(encoded))}")
print(f"time passed: {run_secs} secs")

print(cbor2_encoded == encoded)

print("cbor2 -- decode")
run_start_ns = time.perf_counter_ns()
decoded = cbor2.loads(encoded)
run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns)
print(f"time passed: {run_secs} secs")

print("CRT -- decode")
run_start_ns = time.perf_counter_ns()
decoder = AwsCborDecoder(encoded)
crt_decoded = decoder.pop_next_data_item()

run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns)
print(f"time passed: {run_secs} secs")


print("CRT -- decode 2")
run_start_ns = time.perf_counter_ns()
decoder_2 = AwsCborDecoder(encoded)
decoder_2.consume_next_data_item()

run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns)
print(f"time passed: {run_secs} secs")

print(crt_decoded == t)
print(crt_decoded == decoded)
Loading

0 comments on commit 0e3370d

Please sign in to comment.