Skip to content

Commit

Permalink
Adds the 'tuple_as_sexp' option to simpleion dump/dumps, giving the u…
Browse files Browse the repository at this point in the history
…sers the option to write tuples as Ion s-expressions.
  • Loading branch information
tgregg committed Sep 30, 2019
1 parent c3d6613 commit 7094dfd
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 35 deletions.
56 changes: 40 additions & 16 deletions amazon/ion/simpleion.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, skipkeys=
check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, encoding='utf-8', default=None,
use_decimal=True, namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False,
item_sort_key=None, for_json=None, ignore_nan=False, int_as_string_bitcount=None, iterable_as_array=False,
**kw):
tuple_as_sexp=False, **kw):
"""Serialize ``obj`` as an Ion-formatted stream to ``fp`` (a file-like object), using the following conversion
table::
+-------------------+-------------------+
Expand Down Expand Up @@ -90,10 +90,16 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, skipkeys=
| bytes (Python 3) | blob |
| IonPyBytes(BLOB) | |
|-------------------+-------------------|
| list, tuple, | |
| IonPyList(LIST) | list |
| list, | |
| tuple (when | |
| tuple_as_sexp= | list |
| False) | |
| IonPyList(LIST) | |
|-------------------+-------------------|
| IonPyList(SEXP) | sexp |
| tuple (when | |
| tuple_as_sexp= | sexp |
| True), | |
| IonPyList(SEXP) | |
|-------------------+-------------------|
| dict, namedtuple, | |
| IonPyDict | struct |
Expand Down Expand Up @@ -130,22 +136,26 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, skipkeys=
ignore_nan: NOT IMPLEMENTED
int_as_string_bitcount: NOT IMPLEMENTED
iterable_as_array: NOT IMPLEMENTED
tuple_as_sexp (Optional[True|False]): When True, all tuple values will be written as Ion s-expressions.
When False, all tuple values will be written as Ion lists. Default: False.
**kw: NOT IMPLEMENTED
"""

raw_writer = binary_writer(imports) if binary else text_writer(indent=indent)
writer = blocking_writer(raw_writer, fp)
from_type = _FROM_TYPE_TUPLE_AS_SEXP if tuple_as_sexp else _FROM_TYPE
writer.send(ION_VERSION_MARKER_EVENT) # The IVM is emitted automatically in binary; it's optional in text.
if sequence_as_stream and isinstance(obj, (list, tuple)):
# Treat this top-level sequence as a stream; serialize its elements as top-level values, but don't serialize the
# sequence itself.
for top_level in obj:
_dump(top_level, writer)
_dump(top_level, writer, from_type)
else:
_dump(obj, writer)
_dump(obj, writer, from_type)
writer.send(ION_STREAM_END_EVENT)


_FROM_TYPE = dict(chain(
six.iteritems({
type(None): IonType.NULL,
Expand All @@ -167,25 +177,34 @@ def dump(obj, fp, imports=None, binary=True, sequence_as_stream=False, skipkeys=
),
))

_FROM_TYPE_TUPLE_AS_SEXP = dict(_FROM_TYPE)
_FROM_TYPE_TUPLE_AS_SEXP.update({
tuple: IonType.SEXP
})


def _ion_type(obj):
def _ion_type(obj, from_type):
types = [type(obj)]
while types:
current_type = types.pop()
if current_type in _FROM_TYPE:
return _FROM_TYPE[current_type]
if current_type in from_type:
if current_type is SymbolToken:
# SymbolToken is a tuple. Since tuple also has a mapping, SymbolToken has to be special-cased
# to avoid relying on how the dict is ordered.
return IonType.SYMBOL
return from_type[current_type]
types.extend(current_type.__bases__)

raise TypeError('Unknown scalar type %r' % (type(obj),))


def _dump(obj, writer, field=None):
def _dump(obj, writer, from_type, field=None):
null = is_null(obj)
try:
ion_type = obj.ion_type
ion_nature = True
except AttributeError:
ion_type = _ion_type(obj)
ion_type = _ion_type(obj, from_type)
ion_nature = False
if not null and ion_type.is_container:
if ion_nature:
Expand All @@ -195,10 +214,10 @@ def _dump(obj, writer, field=None):
writer.send(event)
if ion_type is IonType.STRUCT:
for field, val in six.iteritems(obj):
_dump(val, writer, field)
_dump(val, writer, from_type, field)
else:
for elem in obj:
_dump(elem, writer)
_dump(elem, writer, from_type)
event = _ION_CONTAINER_END_EVENT
else:
# obj is a scalar value
Expand All @@ -212,7 +231,8 @@ def _dump(obj, writer, field=None):
def dumps(obj, imports=None, binary=True, sequence_as_stream=False, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None, encoding='utf-8', default=None, use_decimal=True,
namedtuple_as_object=True, tuple_as_array=True, bigint_as_string=False, sort_keys=False, item_sort_key=None,
for_json=None, ignore_nan=False, int_as_string_bitcount=None, iterable_as_array=False, **kw):
for_json=None, ignore_nan=False, int_as_string_bitcount=None, iterable_as_array=False, tuple_as_sexp=False,
**kw):
"""Serialize ``obj`` as Python ``string`` or ``bytes`` object, using the conversion table used by ``dump`` (above).
Args:
Expand Down Expand Up @@ -245,6 +265,8 @@ def dumps(obj, imports=None, binary=True, sequence_as_stream=False, skipkeys=Fal
ignore_nan: NOT IMPLEMENTED
int_as_string_bitcount: NOT IMPLEMENTED
iterable_as_array: NOT IMPLEMENTED
tuple_as_sexp (Optional[True|False]): When True, all tuple values will be written as Ion s-expressions.
When False, all tuple values will be written as Ion lists. Default: False.
**kw: NOT IMPLEMENTED
Returns:
Expand All @@ -253,11 +275,13 @@ def dumps(obj, imports=None, binary=True, sequence_as_stream=False, skipkeys=Fal
"""
ion_buffer = six.BytesIO()

dump(obj, ion_buffer, sequence_as_stream=sequence_as_stream, binary=binary, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular,
dump(obj, ion_buffer, imports=imports, sequence_as_stream=sequence_as_stream, binary=binary, skipkeys=skipkeys,
ensure_ascii=ensure_ascii, check_circular=check_circular,
allow_nan=allow_nan, cls=cls, indent=indent, separators=separators, encoding=encoding, default=default,
use_decimal=use_decimal, namedtuple_as_object=namedtuple_as_object, tuple_as_array=tuple_as_array,
bigint_as_string=bigint_as_string, sort_keys=sort_keys, item_sort_key=item_sort_key, for_json=for_json,
ignore_nan=ignore_nan, int_as_string_bitcount=int_as_string_bitcount, iterable_as_array=iterable_as_array)
ignore_nan=ignore_nan, int_as_string_bitcount=int_as_string_bitcount, iterable_as_array=iterable_as_array,
tuple_as_sexp=tuple_as_sexp, **kw)

ret_val = ion_buffer.getvalue()
ion_buffer.close()
Expand Down
5 changes: 3 additions & 2 deletions tests/test_simple_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
IonPyDecimal, IonPyTimestamp, IonPyText, IonPyBytes, \
IonPyList, IonPyDict, IonPySymbol
from amazon.ion.equivalence import ion_equals
from amazon.ion.simpleion import _ion_type
from amazon.ion.simpleion import _ion_type, _FROM_TYPE

_TEST_FIELD_NAME = SymbolToken('foo', 10)
_TEST_ANNOTATIONS = (SymbolToken('bar', 11),)
Expand Down Expand Up @@ -107,8 +107,9 @@ def test_event_types(p):
assert value_output.ion_type is ion_type
assert p.event.annotations == value_output.ion_annotations


def test_subclass_types():
class Foo(dict):
pass

assert _ion_type(Foo()) is IonType.STRUCT
assert _ion_type(Foo(), _FROM_TYPE) is IonType.STRUCT
54 changes: 37 additions & 17 deletions tests/test_simpleion.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
from amazon.ion.simple_types import IonPyDict, IonPyText, IonPyList, IonPyNull, IonPyBool, IonPyInt, IonPyFloat, \
IonPyDecimal, IonPyTimestamp, IonPyBytes, IonPySymbol, _IonNature
from amazon.ion.equivalence import ion_equals
from amazon.ion.simpleion import dump, dumps, load, loads, _ion_type, _FROM_ION_TYPE
from amazon.ion.simpleion import dump, dumps, load, loads, _ion_type, _FROM_ION_TYPE, _FROM_TYPE_TUPLE_AS_SEXP, \
_FROM_TYPE
from amazon.ion.util import record
from amazon.ion.writer_binary_raw import _serialize_symbol, _write_length
from tests.writer_util import VARUINT_END_BYTE, ION_ENCODED_INT_ZERO, SIMPLE_SCALARS_MAP_BINARY, SIMPLE_SCALARS_MAP_TEXT
Expand All @@ -40,7 +41,7 @@
_st = partial(SymbolToken, sid=None, location=None)


class _Parameter(record('desc', 'obj', 'expected', 'has_symbols', ('stream', False))):
class _Parameter(record('desc', 'obj', 'expected', 'has_symbols', ('stream', False), ('tuple_as_sexp', False))):
def __str__(self):
return self.desc

Expand Down Expand Up @@ -114,6 +115,10 @@ def bytes_of(*args, **kwargs):
b'(0)'
)
),
(
[(), ], # NOTE: the generators will detect this and set 'tuple_as_sexp' to True for this case.
_Expected(b'\xC0', b'()')
)
),
IonType.STRUCT: (
(
Expand Down Expand Up @@ -183,9 +188,12 @@ def generate_containers_binary(container_map, preceding_symbols=0):
obj = test_tuple[0]
expecteds = test_tuple[1].binary
has_symbols = False
tuple_as_sexp = False
for elem in obj:
if isinstance(elem, (dict, Multimap)) and len(elem) > 0:
has_symbols = True
elif ion_type is IonType.SEXP and isinstance(elem, tuple):
tuple_as_sexp = True
if has_symbols and preceding_symbols:
# we need to make a distinct copy that will contain an altered encoding
expecteds = []
Expand All @@ -197,7 +205,7 @@ def generate_containers_binary(container_map, preceding_symbols=0):
expected = bytearray()
for e in expecteds:
expected.extend(e)
yield _Parameter(repr(obj), obj, expected, has_symbols, True)
yield _Parameter(repr(obj), obj, expected, has_symbols, True, tuple_as_sexp=tuple_as_sexp)


def generate_annotated_values_binary(scalars_map, container_map):
Expand Down Expand Up @@ -248,7 +256,7 @@ def _assert_symbol_aware_ion_equals(assertion, output):

def _dump_load_run(p, dumps_func, loads_func, binary):
# test dump
res = dumps_func(p.obj, binary=binary, sequence_as_stream=p.stream)
res = dumps_func(p.obj, binary=binary, sequence_as_stream=p.stream, tuple_as_sexp=p.tuple_as_sexp)
if not p.has_symbols:
if binary:
assert (_IVM + p.expected) == res
Expand Down Expand Up @@ -324,10 +332,13 @@ def generate_containers_text(container_map):
obj = test_tuple[0]
expected = test_tuple[1].text[0]
has_symbols = False
tuple_as_sexp = False
for elem in obj:
if isinstance(elem, dict) and len(elem) > 0:
has_symbols = True
yield _Parameter(repr(obj), obj, expected, has_symbols, True)
elif ion_type is IonType.SEXP and isinstance(elem, tuple):
tuple_as_sexp = True
yield _Parameter(repr(obj), obj, expected, has_symbols, True, tuple_as_sexp=tuple_as_sexp)


def generate_annotated_values_text(scalars_map, container_map):
Expand Down Expand Up @@ -416,10 +427,13 @@ def dump_func(*args, **kw):
[[[]]],
[[], [], []],
[{}, {}, {}],
{u'foo': [], u'bar': [], u'baz': []},
[(), (), ()],
(((),),),
([], [], [],),
{u'foo': [], u'bar': (), u'baz': []},
{u'foo': {u'foo': {}}},
[{u'foo': [{u'foo': []}]}],
{u'foo': [{u'foo': []}]},
{u'foo': ({u'foo': []},)},
{
u'foo': IonPyText.from_value(IonType.STRING, u'bar', annotations=(u'str',)),
u'baz': 123,
Expand All @@ -429,7 +443,8 @@ def dump_func(*args, **kw):
u'sxp': IonPyList.from_value(IonType.SEXP, [
False, IonPyNull.from_value(IonType.STRUCT, None, (u'class',)), Decimal('5.678'),
IonPyText.from_value(IonType.SYMBOL, u'sym2'), IonPyText.from_value(IonType.SYMBOL, u'_a_s_d_f_')
])
]),
u'lst_or_sxp': (123, u'abc')
},

]
Expand All @@ -443,17 +458,17 @@ def _adjust_sids(annotations=()):
return SymbolToken(obj.text, 10 + len(annotations))
return obj

def _to_obj(to_type=None, annotations=()):
def _to_obj(to_type=None, annotations=(), tuple_as_sexp=False):
if to_type is None:
to_type = ion_type
obj_out = _adjust_sids(annotations)
return _FROM_ION_TYPE[ion_type].from_value(to_type, obj_out, annotations=annotations), is_binary, indent
return _FROM_ION_TYPE[ion_type].from_value(to_type, obj_out, annotations=annotations), is_binary, indent, tuple_as_sexp

for obj in roundtrips:
obj = _adjust_sids()
yield obj, is_binary, indent
yield obj, is_binary, indent, False
if not isinstance(obj, _IonNature):
ion_type = _ion_type(obj)
ion_type = _ion_type(obj, _FROM_TYPE)
yield _to_obj()
else:
ion_type = obj.ion_type
Expand All @@ -463,15 +478,19 @@ def _to_obj(to_type=None, annotations=()):
if isinstance(obj, list):
yield _to_obj(IonType.SEXP)
yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'))
if isinstance(obj, tuple) and not isinstance(obj, SymbolToken):
yield _to_obj(IonType.SEXP, tuple_as_sexp=True)
yield _to_obj(IonType.SEXP, annotations=(u'annot1', u'annot2'), tuple_as_sexp=True)


def _assert_roundtrip(before, after):
def _assert_roundtrip(before, after, tuple_as_sexp):
# All loaded Ion values extend _IonNature, even if they were dumped from primitives. This recursively
# wraps each input value in _IonNature for comparison against the output.
def _to_ion_nature(obj):
out = obj
if not isinstance(out, _IonNature):
ion_type = _ion_type(out)
from_type = _FROM_TYPE_TUPLE_AS_SEXP if tuple_as_sexp else _FROM_TYPE
ion_type = _ion_type(out, from_type)
out = _FROM_ION_TYPE[ion_type].from_value(ion_type, out)
if isinstance(out, dict):
update = {}
Expand All @@ -485,6 +504,7 @@ def _to_ion_nature(obj):
update.append(_to_ion_nature(value))
update = IonPyList.from_value(out.ion_type, update, out.ion_annotations)
out = update

return out
assert ion_equals(_to_ion_nature(before), after)

Expand All @@ -493,12 +513,12 @@ def _to_ion_nature(obj):
*tuple(_generate_roundtrips(_ROUNDTRIPS))
)
def test_roundtrip(p):
obj, is_binary, indent = p
obj, is_binary, indent, tuple_as_sexp = p
out = BytesIO()
dump(obj, out, binary=is_binary, indent=indent)
dump(obj, out, binary=is_binary, indent=indent, tuple_as_sexp=tuple_as_sexp)
out.seek(0)
res = load(out)
_assert_roundtrip(obj, res)
_assert_roundtrip(obj, res, tuple_as_sexp)


@parametrize(True, False)
Expand Down

0 comments on commit 7094dfd

Please sign in to comment.