Skip to content

Commit

Permalink
Merge pull request #1074 from ioam/nested_key_hashes
Browse files Browse the repository at this point in the history
Generalized hashing of keys for memoization
  • Loading branch information
philippjfr committed Jan 18, 2017
2 parents 4624484 + c1cd64e commit facf323
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 6 deletions.
11 changes: 6 additions & 5 deletions holoviews/core/spaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,12 +426,13 @@ def __call__(self, *args, **kwargs):
values = tuple(tuple(sorted(s.contents.items())) for s in streams)
key = args + tuple(sorted(kwargs.items())) + values

if key in self._memoized:
return self._memoized[key]
else:

hashed_key = util.deephash(key)
ret = self._memoized.get(hashed_key, None)
if hashed_key and ret is None:
ret = self.callable_function(*args, **kwargs)
self._memoized = {key : ret}
return ret
self._memoized = {hashed_key : ret}
return ret


def get_nested_streams(dmap):
Expand Down
59 changes: 59 additions & 0 deletions holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import numpy as np
import param

import json

try:
from cyordereddict import OrderedDict
except:
Expand All @@ -24,6 +26,63 @@
except ImportError:
dd = None




class HashableJSON(json.JSONEncoder):
"""
Extends JSONEncoder to generate a hashable string for as many types
of object as possible including nested objects and objects that are
not normally hashable. The purpose of this class is to generate
unique strings that once hashed are suitable for use in memoization
and other cases where deep equality must be tested without storing
the entire object.
By default JSONEncoder supports booleans, numbers, strings, lists,
tuples and dictionaries. In order to support other types such as
sets, datetime objects and mutable objects such as pandas Dataframes
or numpy arrays, HashableJSON has to convert these types to
datastructures that can normally be represented as JSON.
Support for other object types may need to be introduced in
future. By default, unrecognized object types are represented by
their id.
One limitation of this approach is that dictionaries with composite
keys (e.g tuples) are not supported due to the JSON spec.
"""
string_hashable = (dt.datetime,)
repr_hashable = ()

def default(self, obj):
if isinstance(obj, set):
return hash(frozenset(obj))
elif isinstance(obj, np.ndarray):
return obj.tolist()
if pd and isinstance(obj, (pd.Series, pd.DataFrame)):
return repr(sorted(list(obj.to_dict().items())))
elif isinstance(obj, self.string_hashable):
return str(obj)
elif isinstance(obj, self.repr_hashable):
return repr(obj)
try:
return hash(obj)
except:
return id(obj)



def deephash(obj):
"""
Given an object, return a hash using HashableJSON. This hash is not
architecture, Python version or platform independent.
"""
try:
return hash(json.dumps(obj, cls=HashableJSON, sort_keys=True))
except:
return None


# Python3 compatibility
import types
if sys.version_info.major == 3:
Expand Down
116 changes: 115 additions & 1 deletion tests/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
import unittest
from unittest import SkipTest

import datetime
import numpy as np
from collections import OrderedDict
try:
import pandas as pd
except:
pd = None

from holoviews.core.util import sanitize_identifier_fn, find_range, max_range, wrap_tuple_streams
from holoviews.core.util import sanitize_identifier_fn, find_range, max_range, wrap_tuple_streams, deephash
from holoviews import Dimension
from holoviews.streams import PositionXY
from holoviews.element.comparison import ComparisonTestCase
Expand All @@ -17,6 +23,114 @@

sanitize_identifier = sanitize_identifier_fn.instance()


class TestDeepHash(ComparisonTestCase):
"""
Tests of deephash function used for memoization.
"""

def test_deephash_list_equality(self):
self.assertEqual(deephash([1,2,3]), deephash([1,2,3]))

def test_deephash_list_inequality(self):
obj1 = [1,2,3]
obj2 = [1,2,3,4]
self.assertNotEqual(deephash(obj1), deephash(obj2))

def test_deephash_set_equality(self):
self.assertEqual(deephash(set([1,2,3])), deephash(set([1,3,2])))

def test_deephash_set_inequality(self):
self.assertNotEqual(deephash(set([1,2,3])), deephash(set([1,3,4])))

def test_deephash_dict_equality(self):
self.assertEqual(deephash({1:'a',2:'b'}), deephash({2:'b', 1:'a'}))

def test_deephash_dict_equality(self):
self.assertNotEqual(deephash({1:'a',2:'b'}), deephash({2:'b', 1:'c'}))

def test_deephash_odict_equality(self):
odict1 = OrderedDict([(1,'a'), (2,'b')])
odict2 = OrderedDict([(1,'a'), (2,'b')])
self.assertEqual(deephash(odict1), deephash(odict2))

def test_deephash_odict_equality(self):
odict1 = OrderedDict([(1,'a'), (2,'b')])
odict2 = OrderedDict([(1,'a'), (2,'c')])
self.assertNotEqual(deephash(odict1), deephash(odict2))

def test_deephash_numpy_equality(self):
self.assertEqual(deephash(np.array([1,2,3])),
deephash(np.array([1,2,3])))

def test_deephash_numpy_inequality(self):
arr1 = np.array([1,2,3])
arr2 = np.array([1,2,4])
self.assertNotEqual(deephash(arr1), deephash(arr2))

def test_deephash_dataframe_equality(self):
if pd is None: raise SkipTest
self.assertEqual(deephash(pd.DataFrame({'a':[1,2,3],'b':[4,5,6]})),
deephash(pd.DataFrame({'a':[1,2,3],'b':[4,5,6]})))

def test_deephash_dataframe_inequality(self):
if pd is None: raise SkipTest
self.assertNotEqual(deephash(pd.DataFrame({'a':[1,2,3],'b':[4,5,6]})),
deephash(pd.DataFrame({'a':[1,2,3],'b':[4,5,8]})))

def test_deephash_series_equality(self):
if pd is None: raise SkipTest
self.assertEqual(deephash(pd.Series([1,2,3])),
deephash(pd.Series([1,2,3])))

def test_deephash_series_inequality(self):
if pd is None: raise SkipTest
self.assertNotEqual(deephash(pd.Series([1,2,3])),
deephash(pd.Series([1,2,7])))

def test_deephash_datetime_equality(self):
dt1 = datetime.datetime(1,2,3)
dt2 = datetime.datetime(1,2,3)
self.assertEqual(deephash(dt1), deephash(dt2))

def test_deephash_datetime_inequality(self):
dt1 = datetime.datetime(1,2,3)
dt2 = datetime.datetime(1,2,5)
self.assertNotEqual(deephash(dt1), deephash(dt2))

def test_deephash_nested_native_equality(self):
obj1 = [[1,2], (3,6,7, [True]), 'a', 9.2, 42, {1:3,2:'c'}]
obj2 = [[1,2], (3,6,7, [True]), 'a', 9.2, 42, {1:3,2:'c'}]
self.assertEqual(deephash(obj1), deephash(obj2))

def test_deephash_nested_native_inequality(self):
obj1 = [[1,2], (3,6,7, [False]), 'a', 9.2, 42, {1:3,2:'c'}]
obj2 = [[1,2], (3,6,7, [True]), 'a', 9.2, 42, {1:3,2:'c'}]
self.assertNotEqual(deephash(obj1), deephash(obj2))

def test_deephash_nested_mixed_equality(self):
obj1 = [datetime.datetime(1,2,3), set([1,2,3]),
pd.DataFrame({'a':[1,2],'b':[3,4]}),
np.array([1,2,3]), {'a':'b', '1':True},
OrderedDict([(1,'a'),(2,'b')]), np.int64(34)]
obj2 = [datetime.datetime(1,2,3), set([1,2,3]),
pd.DataFrame({'a':[1,2],'b':[3,4]}),
np.array([1,2,3]), {'a':'b', '1':True},
OrderedDict([(1,'a'),(2,'b')]), np.int64(34)]
self.assertEqual(deephash(obj1), deephash(obj2))

def test_deephash_nested_mixed_inequality(self):
obj1 = [datetime.datetime(1,2,3), set([1,2,3]),
pd.DataFrame({'a':[1,2],'b':[3,4]}),
np.array([1,2,3]), {'a':'b', '2':True},
OrderedDict([(1,'a'),(2,'b')]), np.int64(34)]
obj2 = [datetime.datetime(1,2,3), set([1,2,3]),
pd.DataFrame({'a':[1,2],'b':[3,4]}),
np.array([1,2,3]), {'a':'b', '1':True},
OrderedDict([(1,'a'),(2,'b')]), np.int64(34)]
self.assertNotEqual(deephash(obj1), deephash(obj2))


class TestAllowablePrefix(ComparisonTestCase):
"""
Tests of allowable and hasprefix method.
Expand Down

0 comments on commit facf323

Please sign in to comment.