Skip to content

Commit

Permalink
Merge pull request #286 from aabadie/fix_hashing
Browse files Browse the repository at this point in the history
FIX: fixing hashing with mixed dtype + test
  • Loading branch information
lesteve committed Jan 14, 2016
2 parents 6e7eee8 + 9db8607 commit 4a9c63d
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ Latest changes
Release 0.9.4
-------------

Alexandre Abadie

FIX joblib.hash error with mixed types sets and dicts containing mixed
types keys when using Python 3.
see https://github.com/joblib/joblib/issues/254

Loïc Estève

FIX joblib.dump/load for big numpy arrays with dtype=object
Expand Down
26 changes: 23 additions & 3 deletions joblib/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ class _ConsistentSet(object):
whatever the order of its items.
"""
def __init__(self, set_sequence):
self._sequence = sorted(set_sequence)
# Forces order of elements in set to ensure consistent hash.
try:
# Trying first to order the set assuming the type of elements is
# consistent and orderable.
# This fails on python 3 when elements are unorderable
# but we keep it in a try as it's faster.
self._sequence = sorted(set_sequence)
except TypeError:
# If elements are unorderable, sorting them using their hash.
# This is slower but works in any case.
self._sequence = sorted((hash(e) for e in set_sequence))


class _MyHash(object):
Expand Down Expand Up @@ -127,8 +137,18 @@ def save_global(self, obj, name=None, pack=struct.pack):
dispatch[type(pickle.dump)] = save_global

def _batch_setitems(self, items):
# forces order of keys in dict to ensure consistent hash
Pickler._batch_setitems(self, iter(sorted(items)))
# forces order of keys in dict to ensure consistent hash.
try:
# Trying first to compare dict assuming the type of keys is
# consistent and orderable.
# This fails on python 3 when keys are unorderable
# but we keep it in a try as it's faster.
Pickler._batch_setitems(self, iter(sorted(items)))
except TypeError:
# If keys are unorderable, sorting them using their hash. This is
# slower but works in any case.
Pickler._batch_setitems(self, iter(sorted((hash(k), v)
for k, v in items)))

def save_set(self, set_items):
# forces order of items in Set to ensure consistent hash
Expand Down
8 changes: 7 additions & 1 deletion joblib/test/test_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,13 @@ def test_trival_hash():
None,
gc.collect,
[1, ].append,
]
# Next 2 sets have unorderable elements in python 3.
set(('a', 1)),
set(('a', 1, ('a', 1))),
# Next 2 dicts have unorderable type of keys in python 3.
{'a': 1, 1: 2},
{'a': 1, 1: 2, 'd': {'a': 1}},
]
for obj1 in obj_list:
for obj2 in obj_list:
# Check that 2 objects have the same hash only if they are
Expand Down

0 comments on commit 4a9c63d

Please sign in to comment.