Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose *_encoders and use them to replace canonical param of CBOREncoder #227

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cbor2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from ._decoder import load as load
from ._decoder import loads as loads
from ._encoder import CBOREncoder as CBOREncoder
from ._encoder import canonical_encoders as canonical_encoders
from ._encoder import default_encoders as default_encoders
from ._encoder import dump as dump
from ._encoder import dumps as dumps
from ._encoder import shareable_encoder as shareable_encoder
Expand Down Expand Up @@ -41,7 +43,6 @@ def _init_cbor2() -> None:

import _cbor2

from ._encoder import canonical_encoders, default_encoders
from ._types import CBORSimpleValue, CBORTag, undefined

_cbor2.default_encoders = OrderedDict(
Expand Down
77 changes: 37 additions & 40 deletions cbor2/_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def wrapper(encoder: CBOREncoder, value: Any) -> None:
return wrapper


# type CBOREncoderDict = dict[type | tuple[str, str], Callable[[CBOREncoder, Any], None]]
CBOREncoderDict = dict[type | tuple[str, str], Callable[[Any, Any], None]]


class CBOREncoder:
"""
The CBOREncoder class implements a fully featured `CBOR`_ encoder with
Expand All @@ -119,7 +123,6 @@ class CBOREncoder:
"_fp_write",
"_shared_containers",
"_encoders",
"_canonical",
"string_referencing",
"string_namespacing",
"_string_references",
Expand All @@ -134,8 +137,8 @@ def __init__(
datetime_as_timestamp: bool = False,
timezone: tzinfo | None = None,
value_sharing: bool = False,
encoders: CBOREncoderDict | None = None,
default: Callable[[CBOREncoder, Any], Any] | None = None,
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
):
Expand All @@ -153,15 +156,13 @@ def __init__(
:param value_sharing:
set to ``True`` to allow more efficient serializing of repeated values and,
more importantly, cyclic data structures, at the cost of extra line overhead
:param encoders:
a dict from types (or type names) to their encoders, the latter of which are
callables of two arguments (the encoder instance and the value being encoded)
:param default:
a callable that is called by the encoder with two arguments (the encoder
instance and the value being encoded) when no suitable encoder has been
found, and should use the methods on the encoder to encode any objects it
wants to add to the data stream
:param canonical:
when ``True``, use "canonical" CBOR representation; this typically involves
sorting maps, sets, etc. into a pre-determined order ensuring that
serializations are comparable without decoding
an encoder callable that is called when no suitable encoder in has been found
in ``encoders``, the callable should use the methods on the encoder to encode
any objects it wants to add to the data stream
:param date_as_datetime:
set to ``True`` to serialize date objects as datetimes (CBOR tag 0), which
was the default behavior in previous releases (cbor2 <= 4.1.2).
Expand All @@ -177,15 +178,15 @@ def __init__(
self.value_sharing = value_sharing
self.string_referencing = string_referencing
self.string_namespacing = string_referencing
if encoders:
self._encoders = encoders.copy()
else:
self._encoders = default_encoders.copy()
self.default = default
self._canonical = canonical
self._shared_containers: dict[
int, tuple[object, int | None]
] = {} # indexes used for value sharing
self._string_references: dict[str | bytes, int] = {} # indexes used for string references
self._encoders = default_encoders.copy()
if canonical:
self._encoders.update(canonical_encoders)

def _find_encoder(self, obj_type: type) -> Callable[[CBOREncoder, Any], None] | None:
for type_or_tuple, enc in list(self._encoders.items()):
Expand Down Expand Up @@ -252,10 +253,6 @@ def default(self, value: Callable[[CBOREncoder, Any], Any] | None) -> None:
else:
raise ValueError("default must be None or a callable")

@property
def canonical(self) -> bool:
return self._canonical

@contextmanager
def disable_value_sharing(self) -> Generator[None, None, None]:
"""
Expand Down Expand Up @@ -646,7 +643,7 @@ def encode_undefined(self, value: UndefinedType) -> None:
self._fp_write(b"\xf7")


default_encoders: dict[type | tuple[str, str], Callable[[CBOREncoder, Any], None]] = {
default_encoders: CBOREncoderDict = {
bytes: CBOREncoder.encode_bytestring,
bytearray: CBOREncoder.encode_bytearray,
str: CBOREncoder.encode_string,
Expand Down Expand Up @@ -679,7 +676,7 @@ def encode_undefined(self, value: UndefinedType) -> None:
}


canonical_encoders: dict[type | tuple[str, str], Callable[[CBOREncoder, Any], None]] = {
canonical_encoders_: CBOREncoderDict = {
float: CBOREncoder.encode_minimal_float,
dict: CBOREncoder.encode_canonical_map,
defaultdict: CBOREncoder.encode_canonical_map,
Expand All @@ -689,14 +686,18 @@ def encode_undefined(self, value: UndefinedType) -> None:
frozenset: CBOREncoder.encode_canonical_set,
}

canonical_encoders: CBOREncoderDict = default_encoders.copy()
canonical_encoders.update(canonical_encoders_)
del canonical_encoders_


def dumps(
obj: object,
datetime_as_timestamp: bool = False,
timezone: tzinfo | None = None,
value_sharing: bool = False,
encoders: CBOREncoderDict | None = None,
default: Callable[[CBOREncoder, Any], None] | None = None,
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
) -> bytes:
Expand All @@ -716,15 +717,13 @@ def dumps(
set to ``True`` to allow more efficient serializing of repeated values
and, more importantly, cyclic data structures, at the cost of extra
line overhead
:param encoders:
a dict from types (or type names) to their encoders, the latter of which are
callables of two arguments (the encoder instance and the value being encoded)
:param default:
a callable that is called by the encoder with two arguments (the encoder
instance and the value being encoded) when no suitable encoder has been found,
and should use the methods on the encoder to encode any objects it wants to add
to the data stream
:param canonical:
when ``True``, use "canonical" CBOR representation; this typically involves
sorting maps, sets, etc. into a pre-determined order ensuring that
serializations are comparable without decoding
an encoder callable that is called when no suitable encoder in has been found
in ``encoders``, the callable should use the methods on the encoder to encode
any objects it wants to add to the data stream
:param date_as_datetime:
set to ``True`` to serialize date objects as datetimes (CBOR tag 0), which was
the default behavior in previous releases (cbor2 <= 4.1.2).
Expand All @@ -739,8 +738,8 @@ def dumps(
datetime_as_timestamp=datetime_as_timestamp,
timezone=timezone,
value_sharing=value_sharing,
encoders=encoders,
default=default,
canonical=canonical,
date_as_datetime=date_as_datetime,
string_referencing=string_referencing,
).encode(obj)
Expand All @@ -753,8 +752,8 @@ def dump(
datetime_as_timestamp: bool = False,
timezone: tzinfo | None = None,
value_sharing: bool = False,
encoders: CBOREncoderDict | None = None,
default: Callable[[CBOREncoder, Any], None] | None = None,
canonical: bool = False,
date_as_datetime: bool = False,
string_referencing: bool = False,
) -> None:
Expand All @@ -776,15 +775,13 @@ def dump(
set to ``True`` to allow more efficient serializing of repeated values
and, more importantly, cyclic data structures, at the cost of extra
line overhead
:param encoders:
a dict from types (or type names) to their encoders, the latter of which are
callables of two arguments (the encoder instance and the value being encoded)
:param default:
a callable that is called by the encoder with two arguments (the encoder
instance and the value being encoded) when no suitable encoder has been found,
and should use the methods on the encoder to encode any objects it wants to add
to the data stream
:param canonical:
when ``True``, use "canonical" CBOR representation; this typically involves
sorting maps, sets, etc. into a pre-determined order ensuring that
serializations are comparable without decoding
an encoder callable that is called when no suitable encoder in has been found
in ``encoders``, the callable should use the methods on the encoder to encode
any objects it wants to add to the data stream
:param date_as_datetime:
set to ``True`` to serialize date objects as datetimes (CBOR tag 0), which was
the default behavior in previous releases (cbor2 <= 4.1.2).
Expand All @@ -797,8 +794,8 @@ def dump(
datetime_as_timestamp=datetime_as_timestamp,
timezone=timezone,
value_sharing=value_sharing,
encoders=encoders,
default=default,
canonical=canonical,
date_as_datetime=date_as_datetime,
string_referencing=string_referencing,
).encode(obj)
84 changes: 36 additions & 48 deletions source/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,22 +119,23 @@ CBOREncoder_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)


// CBOREncoder.__init__(self, fp=None, datetime_as_timestamp=0, timezone=None,
// value_sharing=False, default=None, canonical=False,
// value_sharing=False, encoders=None, default=None,
// date_as_datetime=False)
int
CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
{
static char *keywords[] = {
"fp", "datetime_as_timestamp", "timezone", "value_sharing", "default",
"canonical", "date_as_datetime", "string_referencing", NULL
"fp", "datetime_as_timestamp", "timezone", "value_sharing",
"encoders", "default",
"date_as_datetime", "string_referencing", NULL
};
PyObject *tmp, *fp = NULL, *default_handler = NULL, *tz = NULL;
int value_sharing = 0, timestamp_format = 0, enc_style = 0,
PyObject *tmp, *fp = NULL, *encoders = NULL, *default_handler = NULL, *tz = NULL;
int value_sharing = 0, timestamp_format = 0,
date_as_datetime = 0, string_referencing = 0;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOppp", keywords,
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|pOpOOpp", keywords,
&fp, &timestamp_format, &tz, &value_sharing,
&default_handler, &enc_style, &date_as_datetime,
&encoders, &default_handler, &date_as_datetime,
&string_referencing))
return -1;
// Predicate values are returned as ints, but need to be stored as bool or ubyte
Expand All @@ -144,8 +145,6 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
self->date_as_datetime = true;
if (value_sharing == 1)
self->value_sharing = true;
if (enc_style == 1)
self->enc_style = 1;
if (string_referencing == 1) {
self->string_referencing = true;
self->string_namespacing = true;
Expand All @@ -154,6 +153,28 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)

if (_CBOREncoder_set_fp(self, fp, NULL) == -1)
return -1;

if (!_CBOR2_default_encoders && init_default_encoders() == -1)
return -1;
if (!_CBOR2_canonical_encoders && init_canonical_encoders() == -1)
return -1;

tmp = self->encoders;
if (!encoders) {
self->enc_style = 0;
encoders = _CBOR2_default_encoders;
} else if (encoders == _CBOR2_default_encoders)
self->enc_style = 0;
else if (encoders == _CBOR2_canonical_encoders)
self->enc_style = 1;
else
self->enc_style = 2;
self->encoders = PyObject_CallMethodObjArgs(encoders, _CBOR2_str_copy, NULL);
Py_DECREF(tmp);

if (!self->encoders)
return -1;

if (default_handler && _CBOREncoder_set_default(self, default_handler, NULL) == -1)
return -1;
if (tz && _CBOREncoder_set_timezone(self, tz, NULL) == -1)
Expand All @@ -167,23 +188,6 @@ CBOREncoder_init(CBOREncoderObject *self, PyObject *args, PyObject *kwargs)
if (!self->string_references)
return -1;

if (!_CBOR2_default_encoders && init_default_encoders() == -1)
return -1;

tmp = self->encoders;
self->encoders = PyObject_CallMethodObjArgs(
_CBOR2_default_encoders, _CBOR2_str_copy, NULL);
Py_DECREF(tmp);
if (!self->encoders)
return -1;
if (self->enc_style) {
if (!_CBOR2_canonical_encoders && init_canonical_encoders() == -1)
return -1;
if (!PyObject_CallMethodObjArgs(self->encoders,
_CBOR2_str_update, _CBOR2_canonical_encoders, NULL))
return -1;
}

return 0;
}

Expand Down Expand Up @@ -304,17 +308,6 @@ _CBOREncoder_set_timezone(CBOREncoderObject *self, PyObject *value,
}


// CBOREncoder._get_canonical(self)
static PyObject *
_CBOREncoder_get_canonical(CBOREncoderObject *self, void *closure)
{
if (self->enc_style)
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
}


// Utility methods ///////////////////////////////////////////////////////////

static int
Expand Down Expand Up @@ -2097,9 +2090,6 @@ static PyGetSetDef CBOREncoder_getsetters[] = {
{"timezone",
(getter) _CBOREncoder_get_timezone, (setter) _CBOREncoder_set_timezone,
"the timezone to use when encoding naive datetime objects", NULL},
{"canonical",
(getter) _CBOREncoder_get_canonical, NULL,
"if True, then CBOR canonical encoding will be generated", NULL},
{NULL}
};

Expand Down Expand Up @@ -2201,15 +2191,13 @@ PyDoc_STRVAR(CBOREncoder__doc__,
" set to ``True`` to allow more efficient serializing of repeated\n"
" values and, more importantly, cyclic data structures, at the cost\n"
" of extra line overhead\n"
":param encoders:\n"
" a dict from types (or type names) to their encoders, the latter of which are\n"
" callables of two arguments (the encoder instance and the value being encoded)\n"
":param default:\n"
" a callable that is called by the encoder with two arguments (the\n"
" encoder instance and the value being encoded) when no suitable\n"
" encoder has been found, and should use the methods on the encoder\n"
" to encode any objects it wants to add to the data stream\n"
":param int canonical:\n"
" when True, use \"canonical\" CBOR representation; this typically\n"
" involves sorting maps, sets, etc. into a pre-determined order ensuring\n"
" that serializations are comparable without decoding\n"
" an encoder callable that is called when no suitable encoder in has been found\n"
" in ``encoders``, the callable should use the methods on the encoder to encode\n"
" any objects it wants to add to the data stream\n"
"\n"
".. _CBOR: https://cbor.io/\n"
);
Expand Down
Loading
Loading