Skip to content

Commit

Permalink
Expose *_encoders and use them to replace canonical param of `CBO…
Browse files Browse the repository at this point in the history
…REncoder`

This changes the API a little, yes.
  • Loading branch information
oxij committed Mar 21, 2024
1 parent 9d96226 commit 648acb4
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 96 deletions.
3 changes: 2 additions & 1 deletion cbor2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from ._decoder import load as load
from ._decoder import loads as loads
from ._encoder import CBOREncoder as CBOREncoder
from ._encoder import canonical_encoders as canonical_encoders
from ._encoder import default_encoders as default_encoders
from ._encoder import dump as dump
from ._encoder import dumps as dumps
from ._encoder import shareable_encoder as shareable_encoder
Expand Down Expand Up @@ -41,7 +43,6 @@ def _init_cbor2() -> None:

import _cbor2

from ._encoder import canonical_encoders, default_encoders
from ._types import CBORSimpleValue, CBORTag, undefined

_cbor2.default_encoders = OrderedDict(
Expand Down
77 changes: 37 additions & 40 deletions cbor2/_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def wrapper(encoder: CBOREncoder, value: Any) -> None:
return wrapper


# type CBOREncoderDict = dict[type | tuple[str, str], Callable[[CBOREncoder, Any], None]]
CBOREncoderDict = dict[type | tuple[str, str], Callable[[Any, Any], None]]


class CBOREncoder:
"""
The CBOREncoder class implements a fully featured `CBOR`_ encoder with
Expand All @@ -119,7 +123,6 @@ class CBOREncoder:
"_fp_write",
"_shared_containers",
"_encoders",
"_canonical",
"string_referencing",
"string_namespacing",
"_string_references",
Expand All @@ -134,8 +137,8 @@ def __init__(
datetime_as_timestamp: bool = False,
timezone: tzinfo | None = None,
value_sharing: bool = False,
encoders: CBOREncoderDict | None = None,
default: Callable[[CBOREncoder, Any], Any] | None = None,
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
):
Expand All @@ -153,15 +156,13 @@ def __init__(
:param value_sharing:
set to ``True`` to allow more efficient serializing of repeated values and,
more importantly, cyclic data structures, at the cost of extra line overhead
:param encoders:
a dict from types (or type names) to their encoders, the latter of which are
callables of two arguments (the encoder instance and the value being encoded)
:param default:
a callable that is called by the encoder with two arguments (the encoder
instance and the value being encoded) when no suitable encoder has been
found, and should use the methods on the encoder to encode any objects it
wants to add to the data stream
:param canonical:
when ``True``, use "canonical" CBOR representation; this typically involves
sorting maps, sets, etc. into a pre-determined order ensuring that
serializations are comparable without decoding
an encoder callable that is called when no suitable encoder in has been found
in ``encoders``, the callable should use the methods on the encoder to encode
any objects it wants to add to the data stream
:param date_as_datetime:
set to ``True`` to serialize date objects as datetimes (CBOR tag 0), which
was the default behavior in previous releases (cbor2 <= 4.1.2).
Expand All @@ -177,15 +178,15 @@ def __init__(
self.value_sharing = value_sharing
self.string_referencing = string_referencing
self.string_namespacing = string_referencing
if encoders:
self._encoders = encoders.copy()
else:
self._encoders = default_encoders.copy()
self.default = default
self._canonical = canonical
self._shared_containers: dict[
int, tuple[object, int | None]
] = {} # indexes used for value sharing
self._string_references: dict[str | bytes, int] = {} # indexes used for string references
self._encoders = default_encoders.copy()
if canonical:
self._encoders.update(canonical_encoders)

def _find_encoder(self, obj_type: type) -> Callable[[CBOREncoder, Any], None] | None:
for type_or_tuple, enc in list(self._encoders.items()):
Expand Down Expand Up @@ -252,10 +253,6 @@ def default(self, value: Callable[[CBOREncoder, Any], Any] | None) -> None:
else:
raise ValueError("default must be None or a callable")

@property
def canonical(self) -> bool:
return self._canonical

@contextmanager
def disable_value_sharing(self) -> Generator[None, None, None]:
"""
Expand Down Expand Up @@ -646,7 +643,7 @@ def encode_undefined(self, value: UndefinedType) -> None:
self._fp_write(b"\xf7")


default_encoders: dict[type | tuple[str, str], Callable[[CBOREncoder, Any], None]] = {
default_encoders: CBOREncoderDict = {
bytes: CBOREncoder.encode_bytestring,
bytearray: CBOREncoder.encode_bytearray,
str: CBOREncoder.encode_string,
Expand Down Expand Up @@ -679,7 +676,7 @@ def encode_undefined(self, value: UndefinedType) -> None:
}


canonical_encoders: dict[type | tuple[str, str], Callable[[CBOREncoder, Any], None]] = {
canonical_encoders_: CBOREncoderDict = {
float: CBOREncoder.encode_minimal_float,
dict: CBOREncoder.encode_canonical_map,
defaultdict: CBOREncoder.encode_canonical_map,
Expand All @@ -689,14 +686,18 @@ def encode_undefined(self, value: UndefinedType) -> None:
frozenset: CBOREncoder.encode_canonical_set,
}

canonical_encoders: CBOREncoderDict = default_encoders.copy()
canonical_encoders.update(canonical_encoders_)
del canonical_encoders_


def dumps(
obj: object,
datetime_as_timestamp: bool = False,
timezone: tzinfo | None = None,
value_sharing: bool = False,
encoders: CBOREncoderDict | None = None,
default: Callable[[CBOREncoder, Any], None] | None = None,
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
) -> bytes:
Expand All @@ -716,15 +717,13 @@ def dumps(
set to ``True`` to allow more efficient serializing of repeated values
and, more importantly, cyclic data structures, at the cost of extra
line overhead
:param encoders:
a dict from types (or type names) to their encoders, the latter of which are
callables of two arguments (the encoder instance and the value being encoded)
:param default:
a callable that is called by the encoder with two arguments (the encoder
instance and the value being encoded) when no suitable encoder has been found,
and should use the methods on the encoder to encode any objects it wants to add
to the data stream
:param canonical:
when ``True``, use "canonical" CBOR representation; this typically involves
sorting maps, sets, etc. into a pre-determined order ensuring that
serializations are comparable without decoding
an encoder callable that is called when no suitable encoder in has been found
in ``encoders``, the callable should use the methods on the encoder to encode
any objects it wants to add to the data stream
:param date_as_datetime:
set to ``True`` to serialize date objects as datetimes (CBOR tag 0), which was
the default behavior in previous releases (cbor2 <= 4.1.2).
Expand All @@ -739,8 +738,8 @@ def dumps(
datetime_as_timestamp=datetime_as_timestamp,
timezone=timezone,
value_sharing=value_sharing,
encoders=encoders,
default=default,
canonical=canonical,
date_as_datetime=date_as_datetime,
string_referencing=string_referencing,
).encode(obj)
Expand All @@ -753,8 +752,8 @@ def dump(
datetime_as_timestamp: bool = False,
timezone: tzinfo | None = None,
value_sharing: bool = False,
encoders: CBOREncoderDict | None = None,
default: Callable[[CBOREncoder, Any], None] | None = None,
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
) -> None:
Expand All @@ -776,15 +775,13 @@ def dump(
set to ``True`` to allow more efficient serializing of repeated values
and, more importantly, cyclic data structures, at the cost of extra
line overhead
:param encoders:
a dict from types (or type names) to their encoders, the latter of which are
callables of two arguments (the encoder instance and the value being encoded)
:param default:
a callable that is called by the encoder with two arguments (the encoder
instance and the value being encoded) when no suitable encoder has been found,
and should use the methods on the encoder to encode any objects it wants to add
to the data stream
:param canonical:
when ``True``, use "canonical" CBOR representation; this typically involves
sorting maps, sets, etc. into a pre-determined order ensuring that
serializations are comparable without decoding
an encoder callable that is called when no suitable encoder in has been found
in ``encoders``, the callable should use the methods on the encoder to encode
any objects it wants to add to the data stream
:param date_as_datetime:
set to ``True`` to serialize date objects as datetimes (CBOR tag 0), which was
the default behavior in previous releases (cbor2 <= 4.1.2).
Expand All @@ -797,8 +794,8 @@ def dump(
datetime_as_timestamp=datetime_as_timestamp,
timezone=timezone,
value_sharing=value_sharing,
encoders=encoders,
default=default,
canonical=canonical,
date_as_datetime=date_as_datetime,
string_referencing=string_referencing,
).encode(obj)
84 changes: 36 additions & 48 deletions source/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,22 +119,23 @@ CBOREncoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)


// CBOREncoder.__init__(self, fp=None, datetime_as_timestamp=0, timezone=None,
// value_sharing=False, default=None, canonical=False,
// value_sharing=False, encoders=None, default=None,
// date_as_datetime=False)
int
CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
{
static char *keywords[] = {
"fp", "datetime_as_timestamp", "timezone", "value_sharing", "default",
"canonical", "date_as_datetime", "string_referencing", NULL
"fp", "datetime_as_timestamp", "timezone", "value_sharing",
"encoders", "default",
"date_as_datetime", "string_referencing", NULL
};
PyObject *tmp, *fp = NULL, *default_handler = NULL, *tz = NULL;
int value_sharing = 0, timestamp_format = 0, enc_style = 0,
PyObject *tmp, *fp = NULL, *encoders = NULL, *default_handler = NULL, *tz = NULL;
int value_sharing = 0, timestamp_format = 0,
date_as_datetime = 0, string_referencing = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOppp", keywords,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOOpp", keywords,
&fp, &timestamp_format, &tz, &value_sharing,
&default_handler, &enc_style, &date_as_datetime,
&encoders, &default_handler, &date_as_datetime,
&string_referencing))
return -1;
// Predicate values are returned as ints, but need to be stored as bool or ubyte
Expand All @@ -144,8 +145,6 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
self->date_as_datetime = true;
if (value_sharing == 1)
self->value_sharing = true;
if (enc_style == 1)
self->enc_style = 1;
if (string_referencing == 1) {
self->string_referencing = true;
self->string_namespacing = true;
Expand All @@ -154,6 +153,28 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)

if (_CBOREncoder_set_fp(self, fp, NULL) == -1)
return -1;

if (!_CBOR2_default_encoders && init_default_encoders() == -1)
return -1;
if (!_CBOR2_canonical_encoders && init_canonical_encoders() == -1)
return -1;

tmp = self->encoders;
if (!encoders) {
self->enc_style = 0;
encoders = _CBOR2_default_encoders;
} else if (encoders == _CBOR2_default_encoders)
self->enc_style = 0;
else if (encoders == _CBOR2_canonical_encoders)
self->enc_style = 1;
else
self->enc_style = 2;
self->encoders = PyObject_CallMethodObjArgs(encoders, _CBOR2_str_copy, NULL);
Py_DECREF(tmp);

if (!self->encoders)
return -1;

if (default_handler && _CBOREncoder_set_default(self, default_handler, NULL) == -1)
return -1;
if (tz && _CBOREncoder_set_timezone(self, tz, NULL) == -1)
Expand All @@ -167,23 +188,6 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
if (!self->string_references)
return -1;

if (!_CBOR2_default_encoders && init_default_encoders() == -1)
return -1;

tmp = self->encoders;
self->encoders = PyObject_CallMethodObjArgs(
_CBOR2_default_encoders, _CBOR2_str_copy, NULL);
Py_DECREF(tmp);
if (!self->encoders)
return -1;
if (self->enc_style) {
if (!_CBOR2_canonical_encoders && init_canonical_encoders() == -1)
return -1;
if (!PyObject_CallMethodObjArgs(self->encoders,
_CBOR2_str_update, _CBOR2_canonical_encoders, NULL))
return -1;
}

return 0;
}

Expand Down Expand Up @@ -304,17 +308,6 @@ _CBOREncoder_set_timezone(CBOREncoderObject *self, PyObject *value,
}


// CBOREncoder._get_canonical(self)
static PyObject *
_CBOREncoder_get_canonical(CBOREncoderObject *self, void *closure)
{
if (self->enc_style)
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
}


// Utility methods ///////////////////////////////////////////////////////////

static int
Expand Down Expand Up @@ -2097,9 +2090,6 @@ static PyGetSetDef CBOREncoder_getsetters[] = {
{"timezone",
(getter) _CBOREncoder_get_timezone, (setter) _CBOREncoder_set_timezone,
"the timezone to use when encoding naive datetime objects", NULL},
{"canonical",
(getter) _CBOREncoder_get_canonical, NULL,
"if True, then CBOR canonical encoding will be generated", NULL},
{NULL}
};

Expand Down Expand Up @@ -2201,15 +2191,13 @@ PyDoc_STRVAR(CBOREncoder__doc__,
" set to ``True`` to allow more efficient serializing of repeated\n"
" values and, more importantly, cyclic data structures, at the cost\n"
" of extra line overhead\n"
":param encoders:\n"
" a dict from types (or type names) to their encoders, the latter of which are\n"
" callables of two arguments (the encoder instance and the value being encoded)\n"
":param default:\n"
" a callable that is called by the encoder with two arguments (the\n"
" encoder instance and the value being encoded) when no suitable\n"
" encoder has been found, and should use the methods on the encoder\n"
" to encode any objects it wants to add to the data stream\n"
":param int canonical:\n"
" when True, use \"canonical\" CBOR representation; this typically\n"
" involves sorting maps, sets, etc. into a pre-determined order ensuring\n"
" that serializations are comparable without decoding\n"
" an encoder callable that is called when no suitable encoder in has been found\n"
" in ``encoders``, the callable should use the methods on the encoder to encode\n"
" any objects it wants to add to the data stream\n"
"\n"
".. _CBOR: https://cbor.io/\n"
);
Expand Down
Loading

0 comments on commit 648acb4

Please sign in to comment.