Skip to content

Commit

Permalink
ARROW-1625: [Serialization] Support OrderedDict and defaultdict seria…
Browse files Browse the repository at this point in the history
…lization

This PR adds support for OrderedDicts and default dicts using custom serialization handlers.

Author: Philipp Moritz <pcmoritz@gmail.com>

Closes #1152 from pcmoritz/pydict-exact2 and squashes the following commits:

431e027 [Philipp Moritz] make cloudpickle optional
052b1aa [Philipp Moritz] I'd prefer this not to be a runtime dependency
db19ab9 [Philipp Moritz] add tests
799d983 [Philipp Moritz] do not interpret OrderedDict as dict
  • Loading branch information
pcmoritz authored and wesm committed Oct 3, 2017
1 parent af167fd commit c905783
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 3 deletions.
1 change: 1 addition & 0 deletions ci/travis_script_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ which python

conda install -y -q pip \
nomkl \
cloudpickle \
numpy=1.13.1 \
pandas \
cython \
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/python/python_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
} else if (PyList_Check(elem)) {
RETURN_NOT_OK(builder->AppendList(PyList_Size(elem)));
sublists->push_back(elem);
} else if (PyDict_Check(elem)) {
} else if (PyDict_CheckExact(elem)) {
RETURN_NOT_OK(builder->AppendDict(PyDict_Size(elem)));
subdicts->push_back(elem);
} else if (PyTuple_CheckExact(elem)) {
Expand Down
38 changes: 36 additions & 2 deletions python/pyarrow/tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import pytest

from collections import namedtuple
from collections import namedtuple, OrderedDict, defaultdict
import string
import sys

Expand Down Expand Up @@ -50,6 +50,12 @@ def assert_equal(obj1, obj2):
.format(
obj1,
obj2))
try:
# Workaround to make comparison of OrderedDicts work on Python 2.7
if obj1 == obj2:
return
except:
pass
for key in obj1.__dict__.keys():
if key not in special_keys:
assert_equal(obj1.__dict__[key], obj2.__dict__[key])
Expand Down Expand Up @@ -168,7 +174,8 @@ class CustomError(Exception):

CUSTOM_OBJECTS = [Exception("Test object."), CustomError(), Point(11, y=22),
Foo(), Bar(), Baz(), Qux(), SubQux(), SubQuxPickle(),
NamedTupleExample(1, 1.0, "hi", np.zeros([3, 5]), [1, 2, 3])]
NamedTupleExample(1, 1.0, "hi", np.zeros([3, 5]), [1, 2, 3]),
OrderedDict([("hello", 1), ("world", 2)])]


def make_serialization_context():
Expand Down Expand Up @@ -213,6 +220,28 @@ def array_custom_deserializer(serialized_obj):
custom_serializer=lambda obj: str(obj),
custom_deserializer=deserializer)

def ordered_dict_custom_serializer(obj):
return list(obj.keys()), list(obj.values())

def ordered_dict_custom_deserializer(obj):
return OrderedDict(zip(obj[0], obj[1]))

context.register_type(OrderedDict, 20 * b"\x12", pickle=False,
custom_serializer=ordered_dict_custom_serializer,
custom_deserializer=ordered_dict_custom_deserializer)

def default_dict_custom_serializer(obj):
return list(obj.keys()), list(obj.values()), obj.default_factory

def default_dict_custom_deserializer(obj):
return defaultdict(obj[2], zip(obj[0], obj[1]))

context.register_type(defaultdict, 20 * b"\x13", pickle=False,
custom_serializer=default_dict_custom_serializer,
custom_deserializer=default_dict_custom_deserializer)

context.register_type(type(lambda: 0), 20 * b"\x14", pickle=True)

return context


Expand Down Expand Up @@ -266,6 +295,11 @@ def test_custom_serialization(large_memory_map):
for obj in CUSTOM_OBJECTS:
serialization_roundtrip(obj, mmap)

def test_default_dict_serialization(large_memory_map):
cloudpickle = pytest.importorskip("cloudpickle")
with pa.memory_map(large_memory_map, mode="r+") as mmap:
obj = defaultdict(lambda: 0, [("hello", 1), ("world", 2)])
serialization_roundtrip(obj, mmap)

def test_numpy_serialization(large_memory_map):
with pa.memory_map(large_memory_map, mode="r+") as mmap:
Expand Down
1 change: 1 addition & 0 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pytest
cloudpickle
numpy>=1.10.0
six

0 comments on commit c905783

Please sign in to comment.