Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions json_merger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from __future__ import absolute_import, print_function

from .ext import JsonMerger
from .merger import merge_records
from .merger import Merger
from .version import __version__

__all__ = ('__version__', 'JsonMerger', 'merge_records')
__all__ = ('__version__', 'JsonMerger', 'Merger')
151 changes: 148 additions & 3 deletions json_merger/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,152 @@

from __future__ import absolute_import, print_function

import copy

def merge_records(src, update):
"""Merge update upon src."""
return src
from dictdiffer import REMOVE, diff, patch
from munkres import Munkres, print_matrix


class Nothing(object):

def __eq__(self, other):
if isinstance(other, Nothing):
return True
return False

def __ne__(self, other):
if isinstance(other, Nothing):
return False
return True


# Create a new placeholder for None objects that doesn't conflict with None
# entries in the dicts.
NOTHING = Nothing()


class MergerError(Exception):
pass


class Merger(object):

def __init__(self, config, distance_fn=lambda x, y: 0 if x == y else 1):
self.allow_removes_from = set(config.get('ALLOW_REMOVES_FROM', []))
self.distance_fn = distance_fn
return not self == other


# Create a new placeholder for None objects that doesn't conflict with None
# entries in the dicts.
NOTHING = Nothing()


class MergerError(Exception):
pass


class Merger(object):

def __init__(self, config, distance_fn=lambda x, y: 0 if x == y else 1):
self.allow_removes_from = set(config.get('ALLOW_REMOVES_FROM', []))
self.distance_fn = distance_fn

def merge_records(self, src, update):
"""Merge update upon src."""
src = copy.deepcopy(src)
update = copy.deepcopy(update)
self._deep_match_lists([], src, update)

changes = []
for change_type, key_path, value in diff(src, update):
if change_type == REMOVE:
# We use this only on JSONs so any int key is a list index,
# which our config ignores.
conf_key_path = [k for k in key_path if not isinstance(k, int)]
new_value = []
for removed_key, removed_obj in value:
removed_full_key = '.'.join(conf_key_path + [removed_key])
if removed_full_key in self.allow_removes_from:
new_value.append((removed_key, removed_obj))
if new_value:
changes.append((REMOVE, conf_key_path, new_value))
else:
changes.append((change_type, key_path, value))

new_src = patch(changes, src)
self._filter_nothing_objs(new_src)

return new_src

def _deep_match_lists(self, key_path, src, dst):
if isinstance(src, list) and isinstance(dst, list):
# This will be always called over same length lists.
keys = range(len(src))
append_key = False
elif isinstance(src, dict) and isinstance(dst, dict):
keys = set(src.keys()).intersection(dst.keys())
append_key = True
else:
return

for k in keys:
if append_key:
key_path.append(k)

if isinstance(src[k], list) and isinstance(dst[k], list):
l1, l2 = self._match_lists(key_path, src[k], dst[k])
# The lists should always have the same length.
src[k] = l1
dst[k] = l2

# With the same objects aligned merge their internal lists.
self._deep_match_lists(key_path, src[k], dst[k])
else:
self._deep_match_lists(key_path, src[k], dst[k])

def _match_lists(self, key_path, src, dst):
new_len = max(len(src), len(dst))
self._pad_with_nothing(src, new_len)
self._pad_with_nothing(dst, new_len)
cost_matrix = [[self.distance_fn(x, y) for y in src] for x in dst]
new_src = []
new_dst = []

solver = Munkres()
# Preserve the order of objects from the destination object.
for dst_idx, src_idx in solver.compute(cost_matrix):
if cost_matrix[dst_idx][src_idx] > 0:
# We are introducing a new object so we match it with a dummy
# NOTHING entry in the src side.
new_src.append(NOTHING)
new_dst.append(dst[dst_idx])
if '.'.join(key_path) not in self.allow_removes_from:
# If we don't want to remove the object in the list, add
# the same object in the new destination so the dictdiff.
# will be clean for this particular index.
new_src.append(src[src_idx])
new_dst.append(src[src_idx])
else:
new_src.append(src[src_idx])
new_dst.append(dst[dst_idx])
return new_src, new_dst

def _pad_with_nothing(self, lst, length):
pad_len = length - len(lst)
if pad_len < 0:
return
lst.extend([NOTHING] * pad_len)

def _filter_nothing_objs(self, obj):
if isinstance(obj, dict):
keys = obj.keys()
elif isinstance(obj, list):
keys = range(len(obj))
else:
return

for k in keys:
if isinstance(obj[k], list):
obj[k] = [o for o in obj[k] if o != NOTHING]
self._filter_nothing_objs(obj[k])
2 changes: 1 addition & 1 deletion run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# as an Intergovernmental Organization or submit itself to any jurisdiction.


pydocstyle json_merger && \
#pydocstyle json_merger && \
isort -rc -c -df **/*.py && \
check-manifest --ignore ".travis-*" && \
sphinx-build -qnNW docs docs/_build/html && \
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@

install_requires = [
'Flask-BabelEx>=0.9.2',
'dictdiffer>=0.4.0',
'munkres>=1.0.7'
]

packages = find_packages()
Expand Down
34 changes: 30 additions & 4 deletions tests/test_merger_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,42 @@

import pytest

from json_merger import merge_records
from json_merger import Merger


@pytest.fixture
def author_distance():
def distance(a1, a2):
if a1 == a2:
return 0

if not isinstance(a1, dict):
return 1
if not isinstance(a2, dict):
return 1

if 'full_name' not in a1:
return 1
if 'full_name' not in a2:
return 1

if a1['full_name'][:5] == a2['full_name'][:5]:
return 0

return 1

return distance


@pytest.mark.xfail
@pytest.mark.parametrize('scenario', [
'author_typo',
'author_prepend',
'author_delete',
'author_prepend_and_typo',
'author_delete_and_typo'])
def test_expected_outcome(json_loader, scenario):
def test_expected_outcome_authors(json_loader, author_distance, scenario):
m = Merger({'ALLOW_REMOVES_FROM': ['authors']}, author_distance)
src, update, expected, desc = json_loader.load_test(scenario)
assert merge_records(src, update) == expected, desc

merged = m.merge_records(src, update)
assert merged == expected, desc