Skip to content

Commit

Permalink
merger: list alignment stats
Browse files Browse the repository at this point in the history
Signed-off-by: Mihai Bivol <mm.bivol@gmail.com>
  • Loading branch information
mihaibivol committed May 10, 2016
1 parent 3a8647a commit c60f269
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 29 deletions.
8 changes: 5 additions & 3 deletions json_merger/dict_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@

from __future__ import absolute_import, print_function

import copy
import six

from dictdiffer import ADD, CHANGE, REMOVE, patch
from dictdiffer.merge import Merger, UnresolvedConflictsException

Expand Down Expand Up @@ -88,9 +90,9 @@ class SkipListsMerger(object):

def __init__(self, root, head, update, default_op,
data_lists=None):
self.root = root
self.head = head
self.update = update
self.root = copy.deepcopy(root)
self.head = copy.deepcopy(head)
self.update = copy.deepcopy(update)
self.pick, self.raise_on_conflict = self._parse_op(default_op)
self.data_lists = set(data_lists or [])

Expand Down
23 changes: 17 additions & 6 deletions json_merger/graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ def __init__(self, lst, root):
self.in_result_idx = set()
self.not_in_result_idx = set(range(len(lst)))
self.not_in_result_root_match_idx = set()
self.next_root_match_uid = 0
self.root_matches = {}

self.next_root_match_uid = 0
# For a given index in the initial list retrieve the match uid.
self.match_uids = {}
# For a given index in the initial list retrieve root match uid.
Expand All @@ -74,14 +75,13 @@ def move_to_result(self, lst_idx, match_uid):
self.not_in_result_idx.remove(lst_idx)
self.match_uids[lst_idx] = match_uid

def add_root_match(self, lst_idx, root_idx):
self.lst_root_match_uids[lst_idx] = self.next_root_match_uid
self.root_root_match_uids[root_idx] = self.next_root_match_uid
self.next_root_match_uid += 1
if lst_idx in self.not_in_result_root_match_idx:
self.not_in_result_root_match_idx.remove(lst_idx)

def add_root_match(self, lst_idx, root_idx):
self.root_matches[lst_idx] = root_idx
if lst_idx in self.in_result_idx:
return

self.not_in_result_root_match_idx.add(lst_idx)

@property
Expand All @@ -105,6 +105,17 @@ def not_in_result_root_match(self):
def not_in_result_not_root_match(self):
return [self.lst[e] for e in self.not_in_result_not_root_match_idx]

@property
def not_in_result_root_match_pairs(self):
return [(self.lst[e], self.root[self.root_matches[e]])
for e in self.not_in_result_root_match_idx]

@property
def not_matched_root_objects(self):
matched_root_idx = set(self.root_matches.values())
return [o for idx, o in enumerate(self.root)
if idx not in matched_root_idx]


class ListMatchGraphBuilder(object):

Expand Down
2 changes: 1 addition & 1 deletion json_merger/list_unify.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def unify(self):
self.unified.append(nodes[node])
self.match_uids.append(graph_builder.match_uids[node])
if (self.raise_on_head_delete and
self.head_stats.not_in_result_not_root_match_idx):
self.head_stats.not_in_result_not_root_match):
removed = self.head_stats.not_in_result_not_root_match
raise MergeError(
'Some elements might need to go back to HEAD',
Expand Down
39 changes: 31 additions & 8 deletions json_merger/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
set_obj_at_key_path
)

PLACEHOLDER_STR = "#$PLACEHOLDER$#"


class ListAlignMerger(object):

Expand All @@ -60,6 +62,10 @@ def __init__(self, root, head, update,
self.conflicts = []
self.merged_root = None

self.aligned_root = copy.deepcopy(root)
self.aligned_head = copy.deepcopy(head)
self.aligned_update = copy.deepcopy(update)

def merge(self):
self.merged_root = self._recursive_merge(self.root, self.head,
self.update)
Expand All @@ -77,6 +83,29 @@ def _merge_objects(self, root, head, update, key_path):
self.conflicts.extend(c.with_prefix(key_path) for c in e.content)
return object_merger

def _build_aligned_lists_and_stats(self, list_unifier, key_path):
root_list = []
head_list = []
update_list = []
for root_obj, head_obj, update_obj in list_unifier.unified:
# Cast NOTHING objects to a placeholder so we reserialize back to
# JSON if needed.
root_list.append(root_obj or PLACEHOLDER_STR)
head_list.append(head_obj or PLACEHOLDER_STR)
update_list.append(update_obj or PLACEHOLDER_STR)

# If we can't set that key path a list to be merged wasn't there
# In the first place.
self.aligned_root = set_obj_at_key_path(self.aligned_root,
key_path, root_list, False)
self.aligned_head = set_obj_at_key_path(self.aligned_head,
key_path, head_list, False)
self.aligned_update = set_obj_at_key_path(self.aligned_update,
key_path, update_list, False)
self.head_stats[key_path] = list_unifier.head_stats
self.update_stats[key_path] = list_unifier.update_stats
self.match_uids[key_path] = list_unifier.match_uids

def _unify_lists(self, root, head, update, key_path):
dotted_key_path = get_dotted_key_path(key_path, True)

Expand Down Expand Up @@ -123,14 +152,8 @@ def _recursive_merge(self, root, head, update, key_path=()):
absolute_key_path + (idx, ))
new_list.append(new_obj)

self.head_stats[absolute_key_path] = unifier.head_stats
self.update_stats[absolute_key_path] = unifier.update_stats
self.match_uids[absolute_key_path] = unifier.match_uids

if list_field == ():
root = new_list
else:
set_obj_at_key_path(root, list_field, new_list)
root = set_obj_at_key_path(root, list_field, new_list)
self._build_aligned_lists_and_stats(unifier, absolute_key_path)

return root

Expand Down
6 changes: 6 additions & 0 deletions json_merger/nothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ def __ne__(self, other):
return False
return True

def __nonzero__(self):
return False

def __bool__(self):
return False

def __str__(self):
return 'NOTHING'

Expand Down
29 changes: 22 additions & 7 deletions json_merger/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@

from __future__ import absolute_import, print_function

import six

from .nothing import NOTHING


Expand All @@ -39,25 +37,42 @@ def get_obj_at_key_path(obj, key_path, default=None):
return current


def set_obj_at_key_path(obj, key_path, value):
obj = get_obj_at_key_path(obj, key_path[:-1], NOTHING)
if obj == NOTHING:
def set_obj_at_key_path(obj, key_path, value, raise_key_error=True):
try:
return _set_obj_at_key_path(obj, key_path, value)
except KeyError as e:
if raise_key_error:
raise e
else:
return obj


def _set_obj_at_key_path(obj, key_path, value):
# We are setting the obj param to another value.
if len(key_path) == 0:
return value

# Try to get the parent of the object to be set.
parent = get_obj_at_key_path(obj, key_path[:-1], NOTHING)
if parent == NOTHING:
raise KeyError(key_path)
try:
obj[key_path[-1]] = value
parent[key_path[-1]] = value
except (KeyError, IndexError, TypeError):
raise KeyError(key_path)
return obj


def del_obj_at_key_path(obj, key_path, raise_key_error=True):
obj = get_obj_at_key_path(obj, key_path[:-1], NOTHING)
not_found = True
if obj != NOTHING:
try:
del obj[key_path[-1]]
except (KeyError, IndexError, TypeError):
if raise_key_error:
raise KeyError(key_path)
elif raise_key_error:
raise KeyError(key_path)


def has_prefix(key_path, prefix):
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/test_nothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,7 @@ def test_nothing_neq():
def test_nothing_repr():
assert str(NOTHING) == 'NOTHING'
assert repr(NOTHING) == 'NOTHING'


def test_nothing_bool():
assert not NOTHING
19 changes: 15 additions & 4 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,33 @@ def test_get_obj_at_key_path():

def test_set_obj_at_key_path():
o = {'a': [{'a': [1, 2, 3]}]}
original_o = {'a': [{'a': [1, 2, 3]}]}

with pytest.raises(KeyError):
set_obj_at_key_path(o, ['a', '1234'], 42)
o = set_obj_at_key_path(o, ['a', '1234'], 42, False)
assert o == original_o

with pytest.raises(KeyError):
set_obj_at_key_path(o, ['a', 1234], 42)
o = set_obj_at_key_path(o, ['a', 1234], 42, False)
assert o == original_o

with pytest.raises(KeyError):
set_obj_at_key_path(o, ['a', 0, 'd', 10, 11, 12], 42)
o = set_obj_at_key_path(o, ['a', 0, 'd', 10, 11, 12], 42, False)
assert o == original_o

set_obj_at_key_path(o, ['a', 0, 'a', 0], 42)
o = set_obj_at_key_path(o, ['a', 0, 'a', 0], 42)
assert o['a'][0]['a'][0] == 42
set_obj_at_key_path(o, ['a', 0, 'a'], 42)
o = set_obj_at_key_path(o, ['a', 0, 'a'], 42)
assert o['a'][0]['a'] == 42
set_obj_at_key_path(o, ['a', 0], 42)
o = set_obj_at_key_path(o, ['a', 0], 42)
assert o['a'][0] == 42
set_obj_at_key_path(o, ['a'], 42)
o = set_obj_at_key_path(o, ['a'], 42)
assert o['a'] == 42
o = set_obj_at_key_path(o, [], 42)
assert o == 42


def test_get_conf_set_for_key_path():
Expand Down

0 comments on commit c60f269

Please sign in to comment.