From da13a7f6d79d5073f320de0e6b1a307063f9ce07 Mon Sep 17 00:00:00 2001 From: Dengke Tang Date: Mon, 22 Apr 2024 09:06:31 -0700 Subject: [PATCH] decode 2 --- awscrt/cbor.py | 78 +++++++-------- benchmark_cbor.py | 2 +- source/cbor.c | 243 ++++++++++++++++++++++++++++++++++++---------- source/cbor.h | 2 + source/module.c | 2 + 5 files changed, 237 insertions(+), 90 deletions(-) diff --git a/awscrt/cbor.py b/awscrt/cbor.py index 9f9c5c4b..2a7cb60a 100644 --- a/awscrt/cbor.py +++ b/awscrt/cbor.py @@ -283,46 +283,48 @@ def pop_next_inf_str(self) -> bytes: return result def pop_next_list(self) -> list: - type = _awscrt.cbor_decoder_peek_type(self._binding) - return_val = [] - if type == AwsCborElementType.InfArray: - # Consume the inf_array - self.consume_next_element() - while type != AwsCborElementType.Break: - return_val.append(self.pop_next_data_item()) - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return return_val - elif type == AwsCborElementType.ArrayStart: - number_elements = self.pop_next_array_start() - for i in range(number_elements): - return_val.append(self.pop_next_data_item()) - return return_val - else: - raise ValueError("the cbor src is not a list to decode") + return _awscrt.cbor_decoder_pop_next_py_list(self._binding) + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # return_val = [] + # if type == AwsCborElementType.InfArray: + # # Consume the inf_array + # self.consume_next_element() + # while type != AwsCborElementType.Break: + # return_val.append(self.pop_next_data_item()) + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # # Consume the break + # self.consume_next_element() + # return return_val + # elif type == AwsCborElementType.ArrayStart: + # number_elements = self.pop_next_array_start() + # for i in range(number_elements): + # return_val.append(self.pop_next_data_item()) + # return return_val + # else: + # raise ValueError("the cbor src is not a list to decode") def pop_next_map(self) -> dict: - type = _awscrt.cbor_decoder_peek_type(self._binding) - return_val = {} - if type == AwsCborElementType.InfMap: - # Consume the inf_map - self.consume_next_element() - while type != AwsCborElementType.Break: - return_val[self.pop_next_data_item()] = self.pop_next_data_item() - type = _awscrt.cbor_decoder_peek_type(self._binding) - # Consume the break - self.consume_next_element() - return return_val - elif type == AwsCborElementType.MapStart: - number_elements = self.pop_next_map_start() - for i in range(number_elements): - key = self.pop_next_data_item() - value = self.pop_next_data_item() - return_val[key] = value - return return_val - else: - raise ValueError("the cbor src is not a map to decode") + return _awscrt.cbor_decoder_pop_next_py_dict(self._binding) + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # return_val = {} + # if type == AwsCborElementType.InfMap: + # # Consume the inf_map + # self.consume_next_element() + # while type != AwsCborElementType.Break: + # return_val[self.pop_next_data_item()] = self.pop_next_data_item() + # type = _awscrt.cbor_decoder_peek_type(self._binding) + # # Consume the break + # self.consume_next_element() + # return return_val + # elif type == AwsCborElementType.MapStart: + # number_elements = self.pop_next_map_start() + # for i in range(number_elements): + # key = self.pop_next_data_item() + # value = self.pop_next_data_item() + # return_val[key] = value + # return return_val + # else: + # raise ValueError("the cbor src is not a map to decode") def pop_next_data_item(self) -> Any: # TODO: timestamp, decimal fraction diff --git a/benchmark_cbor.py b/benchmark_cbor.py index f0d8f5fb..655fa6a3 100644 --- a/benchmark_cbor.py +++ b/benchmark_cbor.py @@ -82,7 +82,7 @@ def random_number(r, n): print("CRT -- decode 2") run_start_ns = time.perf_counter_ns() decoder_2 = AwsCborDecoder(encoded) -decoder_2.consume_next_data_item() +crt_decoded = decoder_2.pop_next_data_item_2() run_secs = ns_to_secs(time.perf_counter_ns() - run_start_ns) print(f"time passed: {run_secs} secs") diff --git a/source/cbor.c b/source/cbor.c index 3e065c8b..da389bca 100644 --- a/source/cbor.c +++ b/source/cbor.c @@ -286,79 +286,210 @@ S_POP_NEXT_TO_PYOBJECT(uint64_t, array_start, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(uint64_t, map_start, PyLong_FromUnsignedLongLong) S_POP_NEXT_TO_PYOBJECT(uint64_t, tag_val, PyLong_FromUnsignedLongLong) +static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder); + /** - * Generic helper to convert a cbor encoded data to PyObject + * Generic helper to convert next data item to py_list */ -static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder) { +static PyObject *s_cbor_decoder_pop_next_data_item_to_py_list(struct aws_cbor_decoder *decoder) { enum aws_cbor_element_type out_type = 0; if (aws_cbor_decoder_peek_type(decoder, &out_type)) { return PyErr_AwsLastError(); } - switch (decoder->cached_context.type) { - case AWS_CBOR_TYPE_TAG: - /* Read the next data item */ - /* TODO: error check for the tag content?? */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + PyObject *array = NULL; + PyObject *item = NULL; + switch (out_type) { + case AWS_CBOR_TYPE_ARRAY_START: { + uint64_t num_array_item; + aws_cbor_decoder_pop_next_array_start(decoder, &num_array_item); + if (num_array_item > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "number of array is too large to fit."); + return NULL; } - break; - case AWS_CBOR_TYPE_MAP_START: { - uint64_t num_map_item = decoder->cached_context.cbor_data.map_start; - /* Reset type */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - for (uint64_t i = 0; i < num_map_item; i++) { - /* Key */ - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + array = PyList_New((Py_ssize_t)num_array_item); + if (!array) { + return NULL; + } + for (size_t i = 0; i < num_array_item; ++i) { + item = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!item) { + goto error; } - /* Value */ - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + PyList_SetItem(array, i, item); /* Steals reference to item */ + } + return array; + } + case AWS_CBOR_TYPE_INF_ARRAY_START: { + array = PyList_New(0); + if (!array) { + return NULL; + } + /* Consume the inf array start */ + aws_cbor_decoder_consume_next_element(decoder, NULL /*consumed_type*/); + aws_cbor_decoder_peek_type(decoder, &out_type); + while (out_type != AWS_CBOR_TYPE_BREAK) { + item = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!item) { + goto error; + } + if (PyList_Append(array, item) == -1) { + goto error; + } + /* Append will not steal the reference, deref here. */ + Py_DECREF(item); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + PyErr_AwsLastError(); + goto error; } } - break; + return array; } - case AWS_CBOR_TYPE_ARRAY_START: { - uint64_t num_array_item = decoder->cached_context.cbor_data.array_start; - /* Reset type */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - for (uint64_t i = 0; i < num_array_item; i++) { - /* item */ - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + default: + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } +error: + if (array) { + Py_DECREF(array); + } + return NULL; +} + +/** + * Generic helper to convert next data item to py_dict + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_py_dict(struct aws_cbor_decoder *decoder) { + enum aws_cbor_element_type out_type = 0; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + PyObject *dict = NULL; + PyObject *key = NULL; + PyObject *value = NULL; + switch (out_type) { + case AWS_CBOR_TYPE_MAP_START: { + uint64_t num_item; + aws_cbor_decoder_pop_next_map_start(decoder, &num_item); + if (num_item > PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_OverflowError, "number of dict is too large to fit."); + return NULL; + } + dict = PyDict_New(); + if (!dict) { + return NULL; + } + for (size_t i = 0; i < num_item; ++i) { + key = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + value = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!key || !value) { + goto error; + } + if (PyDict_SetItem(dict, key, value) == -1) { + goto error; } + Py_DECREF(key); + Py_DECREF(value); } - break; + return dict; } - case AWS_CBOR_TYPE_INF_BYTESTRING_START: - case AWS_CBOR_TYPE_INF_STRING_START: - case AWS_CBOR_TYPE_INF_ARRAY_START: case AWS_CBOR_TYPE_INF_MAP_START: { - enum aws_cbor_element_type next_type; - /* Reset the cache for the tag val */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - if (aws_cbor_decoder_peek_type(decoder, &next_type)) { - return AWS_OP_ERR; + dict = PyDict_New(); + if (!dict) { + return NULL; } - while (next_type != AWS_CBOR_TYPE_BREAK) { - if (aws_cbor_decoder_consume_next_data_item(decoder)) { - return AWS_OP_ERR; + /* Consume the inf array start */ + aws_cbor_decoder_consume_next_element(decoder, NULL /*consumed_type*/); + aws_cbor_decoder_peek_type(decoder, &out_type); + while (out_type != AWS_CBOR_TYPE_BREAK) { + key = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + value = s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); + if (!key || !value) { + goto error; } - if (aws_cbor_decoder_peek_type(decoder, &next_type)) { - return AWS_OP_ERR; + if (PyDict_SetItem(dict, key, value) == -1) { + goto error; + } + Py_DECREF(key); + Py_DECREF(value); + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + PyErr_AwsLastError(); + goto error; } } - break; + return dict; } - default: - break; + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } +error: + if (dict) { + Py_DECREF(dict); } + if (key) { + Py_DECREF(key); + } + if (value) { + Py_DECREF(value); + } + return NULL; +} - /* Done, just reset the cache */ - decoder->cached_context.type = AWS_CBOR_TYPE_MAX; - return AWS_OP_SUCCESS; +/** + * Generic helper to convert a cbor encoded data to PyObject + */ +static PyObject *s_cbor_decoder_pop_next_data_item_to_pyobject(struct aws_cbor_decoder *decoder) { + enum aws_cbor_element_type out_type = 0; + if (aws_cbor_decoder_peek_type(decoder, &out_type)) { + return PyErr_AwsLastError(); + } + switch (out_type) { + case AWS_CBOR_TYPE_UINT: + return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_NEGINT: { + /* The value from native code is -1 - val. */ + PyObject *minus_one = PyLong_FromLong(-1); + if (!minus_one) { + return NULL; + } + PyObject *val = s_cbor_decoder_pop_next_neg_val_to_pyobject(decoder); + if (!val) { + Py_DECREF(minus_one); + return NULL; + } + /* Get */ + PyObject *ret_val = PyNumber_Subtract(minus_one, val); + Py_DECREF(minus_one); + Py_DECREF(val); + return ret_val; + } + case AWS_CBOR_TYPE_DOUBLE: + return s_cbor_decoder_pop_next_double_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_BYTESTRING: + return s_cbor_decoder_pop_next_bytes_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_STRING: + return s_cbor_decoder_pop_next_str_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_BOOL: + return s_cbor_decoder_pop_next_boolean_val_to_pyobject(decoder); + case AWS_CBOR_TYPE_NULL: + case AWS_CBOR_TYPE_UNDEFINE: + aws_cbor_decoder_consume_next_element(decoder, NULL); + Py_RETURN_NONE; + case AWS_CBOR_TYPE_MAP_START: + case AWS_CBOR_TYPE_INF_MAP_START: + return s_cbor_decoder_pop_next_data_item_to_py_dict(decoder); + case AWS_CBOR_TYPE_ARRAY_START: + case AWS_CBOR_TYPE_INF_ARRAY_START: + return s_cbor_decoder_pop_next_data_item_to_py_list(decoder); + case AWS_CBOR_TYPE_INF_BYTESTRING_START: + case AWS_CBOR_TYPE_INF_STRING_START: + case AWS_CBOR_TYPE_TAG: + /* TODO: handle those case */ + default: + aws_raise_error(AWS_ERROR_CBOR_UNEXPECTED_TYPE); + return PyErr_AwsLastError(); + } + return NULL; } /*********************************** BINDINGS ***********************************************/ @@ -408,7 +539,17 @@ PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args) { return s_cbor_decoder_pop_next_tag_val_to_pyobject(decoder); } +PyObject *aws_py_cbor_decoder_pop_next_py_list(PyObject *self, PyObject *args) { + S_GET_DECODER(); + return s_cbor_decoder_pop_next_data_item_to_py_list(decoder); +} + +PyObject *aws_py_cbor_decoder_pop_next_py_dict(PyObject *self, PyObject *args) { + S_GET_DECODER(); + return s_cbor_decoder_pop_next_data_item_to_py_dict(decoder); +} + PyObject *aws_py_cbor_decoder_pop_next_data_item(PyObject *self, PyObject *args) { S_GET_DECODER(); - return s_cbor_decoder_pop_next_unsigned_val_to_pyobject(decoder); + return s_cbor_decoder_pop_next_data_item_to_pyobject(decoder); } diff --git a/source/cbor.h b/source/cbor.h index 0bbe5994..64b8e9d0 100644 --- a/source/cbor.h +++ b/source/cbor.h @@ -45,6 +45,8 @@ PyObject *aws_py_cbor_decoder_pop_next_array_start(PyObject *self, PyObject *arg PyObject *aws_py_cbor_decoder_pop_next_map_start(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_tag_val(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_py_list(PyObject *self, PyObject *args); +PyObject *aws_py_cbor_decoder_pop_next_py_dict(PyObject *self, PyObject *args); PyObject *aws_py_cbor_decoder_pop_next_data_item(PyObject *self, PyObject *args); #endif /* AWS_CRT_PYTHON_CBOR_H */ diff --git a/source/module.c b/source/module.c index 7c2ef18f..8cb64584 100644 --- a/source/module.c +++ b/source/module.c @@ -852,6 +852,8 @@ static PyMethodDef s_module_methods[] = { AWS_PY_METHOD_DEF(cbor_decoder_pop_next_array_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_map_start, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_tag_val, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_py_list, METH_VARARGS), + AWS_PY_METHOD_DEF(cbor_decoder_pop_next_py_dict, METH_VARARGS), AWS_PY_METHOD_DEF(cbor_decoder_pop_next_data_item, METH_VARARGS), {NULL, NULL, 0, NULL}, };