Skip to content

Commit

Permalink
diff: fix ignore of unicode keys
Browse files Browse the repository at this point in the history
  • Loading branch information
nmeisels authored and jirikuncar committed Oct 11, 2017
1 parent c9b5a8e commit 2835f20
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 12 deletions.
33 changes: 21 additions & 12 deletions dictdiffer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def diff(first, second, node=None, ignore=None, path_limit=None, expand=False,
:param first: The original dictionary, ``list`` or ``set``.
:param second: New dictionary, ``list`` or ``set``.
:param node: Key for comparison that can be used in :func:`dot_lookup`.
:param ignore: List of keys that should not be checked.
:param ignore: Set of keys that should not be checked.
:param path_limit: List of path limit tuples or dictdiffer.utils.Pathlimit
object to limit the diff recursion depth.
:param expand: Expand the patches.
Expand All @@ -112,29 +112,38 @@ def diff(first, second, node=None, ignore=None, path_limit=None, expand=False,
.. versionchanged:: 0.7
Diff items are deep copies from its corresponding objects.
Argument *ignore* is always converted to a ``set``.
"""
if path_limit is not None and not isinstance(path_limit, PathLimit):
path_limit = PathLimit(path_limit)

if isinstance(ignore, list):
ignore = {
tuple(value) if isinstance(value, list) else value
for value in ignore
}

node = node or []
if all(map(lambda x: isinstance(x, string_types), node)):
dotted_node = '.'.join(node)
else:
dotted_node = list(node)

def dotted(node, default_type=list):
"""Return dotted notation."""
if all(map(lambda x: isinstance(x, string_types), node)):
return '.'.join(node)
else:
return default_type(node)

dotted_node = dotted(node)

differ = False

if isinstance(first, DICT_TYPES) and isinstance(second, DICT_TYPES):
# dictionaries are not hashable, we can't use sets
def check(key):
"""Test if key in current node should be ignored."""
if PY2 and isinstance(key, text_type):
new_key = key.encode('utf-8')
else:
new_key = key
return ignore is None \
or (node + [key] if isinstance(dotted_node, LIST_TYPES)
else '.'.join(node + [str(new_key)])) not in ignore
return ignore is None or (
dotted(node + [key], default_type=tuple) not in ignore and
tuple(node + [key]) not in ignore
)

intersection = [k for k in first if k in second and check(k)]
addition = [k for k in second if k not in first and check(k)]
Expand Down
13 changes: 13 additions & 0 deletions tests/test_dictdiffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ def test_unicode_keys(self):
diffed = list(diff(first, second, ignore=['hello']))
assert ('remove', '', [(u'привет', 1)]) == diffed[0]

diffed = list(diff(first, second, ignore=[u'привет']))
assert ('add', '', [('hello', 1)]) == diffed[0]

def test_ignore_key(self):
first = {'a': 'a', 'b': 'b', 'c': 'c'}
second = {'a': 'a', 'b': 2, 'c': 3}
Expand All @@ -261,6 +264,16 @@ def test_ignore_dotted_key(self):
diffed = next(diff(first, second, ignore=['a.aa']))
assert ('change', 'a.ac', ('C', 3)) == diffed

def test_ignore_with_unicode_sub_keys(self):
first = {u'a': {u'aא': {u'aa': 'A'}}}
second = {u'a': {u'aא': {u'aa': 'B'}}}

assert len(list(diff(first, second))) == 1
assert len(list(diff(first, second, ignore=[u'a.aא.aa']))) == 0
assert len(
list(diff(first, second, ignore=[[u'a', u'aא', u'aa']
]))) == 0

def test_ignore_complex_key(self):
first = {'a': {1: {'a': 'a', 'b': 'b'}}}
second = {'a': {1: {'a': 1, 'b': 2}}}
Expand Down

0 comments on commit 2835f20

Please sign in to comment.