Skip to content

Commit

Permalink
matcher: Add uids for all the matches
Browse files Browse the repository at this point in the history
Signed-off-by: Mihai Bivol <mm.bivol@gmail.com>
  • Loading branch information
mihaibivol committed May 10, 2016
1 parent 7fddfe9 commit caf82a7
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 26 deletions.
39 changes: 29 additions & 10 deletions json_merger/graph_builder.py
Expand Up @@ -27,7 +27,6 @@
import six

from .comparator import DefaultComparator
from .conflict import Conflict, ConflictType
from .nothing import NOTHING

FIRST = 'first'
Expand Down Expand Up @@ -61,14 +60,28 @@ def __init__(self, lst, root):
self.in_result_idx = set()
self.not_in_result_idx = set(range(len(lst)))
self.not_in_result_root_match_idx = set()
self.next_root_match_uid = 0

def move_to_result(self, lst_idx):
# For a given index in the initial list retrieve the match uid.
self.match_uids = {}
# For a given index in the initial list retrieve root match uid.
self.lst_root_match_uids = {}
# For a given index in the root list retrieve root match uid.
self.root_root_match_uids = {}

def move_to_result(self, lst_idx, match_uid):
self.in_result_idx.add(lst_idx)
self.not_in_result_idx.remove(lst_idx)
self.match_uids[lst_idx] = match_uid

def add_root_match(self, lst_idx, root_idx):
self.lst_root_match_uids[lst_idx] = self.next_root_match_uid
self.root_root_match_uids[root_idx] = self.next_root_match_uid
self.next_root_match_uid += 1

def add_root_match(self, lst_idx):
if lst_idx in self.in_result_idx:
return

self.not_in_result_root_match_idx.add(lst_idx)

@property
Expand Down Expand Up @@ -127,6 +140,7 @@ def __init__(self, root, head, update, sources,
self._update_idx_to_node = {}

self._next_node_id = 0
self.match_uids = {}

def _new_node_id(self):
node_id = self._next_node_id
Expand Down Expand Up @@ -178,21 +192,26 @@ def _populate_nodes(self):
self._push_node(root_elem, head_elem, update_elem)

def _build_stats(self):
for root_idx, head_idx, update_idx in self._node_src_indices.values():
match_uid = 0
for node_id, indices in self._node_src_indices.items():
root_idx, head_idx, update_idx = indices
match_uid += 1

if head_idx >= 0:
self.head_stats.move_to_result(head_idx)
self.head_stats.move_to_result(head_idx, match_uid)
if update_idx >= 0:
self.update_stats.move_to_result(update_idx)
self.update_stats.move_to_result(update_idx, match_uid)
self.match_uids[node_id] = match_uid

for idx in self.head_stats.not_in_result_idx:
for idx in range(len(self.head)):
root_idx, root = self._get_match('root', 'head', idx)
if root_idx >= 0:
self.head_stats.add_root_match(idx)
self.head_stats.add_root_match(idx, root_idx)

for idx in self.update_stats.not_in_result_idx:
for idx in range(len(self.update)):
root_idx, root = self._get_match('root', 'update', idx)
if root_idx >= 0:
self.head_stats.add_root_match(idx)
self.update_stats.add_root_match(idx, root_idx)

def build_graph(self):
self._populate_nodes()
Expand Down
11 changes: 6 additions & 5 deletions json_merger/list_unify.py
Expand Up @@ -33,7 +33,6 @@
GraphBuilderError, ListMatchGraphBuilder, sort_cyclic_graph_best_effort,
toposort
)
from .nothing import NOTHING

_OPERATIONS = [
'KEEP_ONLY_HEAD_ENTITIES',
Expand Down Expand Up @@ -87,7 +86,6 @@ def __init__(self, root, head, update, operation,
self.update_stats = None

# Wether to raise error on deleting a head entity.
# TODO implement this
self.raise_on_head_delete = operation in _RAISE_ERROR_OPS
# Sources from which to keep entities.
self.sources = _SOURCES[operation]
Expand All @@ -96,6 +94,7 @@ def __init__(self, root, head, update, operation,
self.pick_first = _PICK_FIRST[operation]

self.unified = []
self.match_uids = []

def unify(self):
graph_builder = ListMatchGraphBuilder(
Expand All @@ -106,13 +105,14 @@ def unify(self):
except GraphBuilderError as e:
# Can't partially recover from this, just keep self.head and call.
# For manual alignment with self.update.
# TODO better fallback from this
self.unified = [(h, h, h) for h in self.head]
raise MergeError(e.message,
[Conflict(ConflictType.MANUAL_MERGE, (),
self.update)])

self.head_stats = graph_builder.head_stats
self.update_stats = graph_builder.update_stats
finally:
self.head_stats = graph_builder.head_stats
self.update_stats = graph_builder.update_stats

try:
node_order = toposort(graph, self.pick_first)
Expand All @@ -123,6 +123,7 @@ def unify(self):
finally:
for node in node_order:
self.unified.append(nodes[node])
self.match_uids.append(graph_builder.match_uids[node])
if (self.raise_on_head_delete and
self.head_stats.not_in_result_not_root_match_idx):
removed = self.head_stats.not_in_result_not_root_match
Expand Down
30 changes: 19 additions & 11 deletions json_merger/merger.py
Expand Up @@ -53,6 +53,9 @@ def __init__(self, root, head, update,
self.root = copy.deepcopy(root)
self.head = copy.deepcopy(head)
self.update = copy.deepcopy(update)
self.match_uids = {}
self.head_stats = {}
self.update_stats = {}

self.conflicts = []
self.merged_root = None
Expand All @@ -74,7 +77,7 @@ def _merge_objects(self, root, head, update, key_path):
self.conflicts.extend(c.with_prefix(key_path) for c in e.content)
return object_merger

def _merge_lists(self, root, head, update, key_path):
def _unify_lists(self, root, head, update, key_path):
dotted_key_path = get_dotted_key_path(key_path, True)

operation = self.list_merge_ops.get(dotted_key_path,
Expand All @@ -88,14 +91,7 @@ def _merge_lists(self, root, head, update, key_path):
except MergeError as e:
self.conflicts.extend(c.with_prefix(key_path) for c in e.content)

new_root_list = []
for idx, objects in enumerate(list_unifier.unified):
root_obj, head_obj, update_obj = objects
new_obj = self._recursive_merge(root_obj, head_obj, update_obj,
key_path + (idx, ))
new_root_list.append(new_obj)

return new_root_list
return list_unifier

def _recursive_merge(self, root, head, update, key_path=()):
dotted_key_path = get_dotted_key_path(key_path, True)
Expand All @@ -117,8 +113,20 @@ def _recursive_merge(self, root, head, update, key_path=()):
head_l = get_obj_at_key_path(head, list_field, [])
update_l = get_obj_at_key_path(update, list_field, [])

new_list = self._merge_lists(root_l, head_l, update_l,
absolute_key_path)
unifier = self._unify_lists(root_l, head_l, update_l,
absolute_key_path)

new_list = []
for idx, objects in enumerate(unifier.unified):
root_obj, head_obj, update_obj = objects
new_obj = self._recursive_merge(root_obj, head_obj, update_obj,
absolute_key_path + (idx, ))
new_list.append(new_obj)

self.head_stats[absolute_key_path] = unifier.head_stats
self.update_stats[absolute_key_path] = unifier.update_stats
self.match_uids[absolute_key_path] = unifier.match_uids

if list_field == ():
root = new_list
else:
Expand Down

0 comments on commit caf82a7

Please sign in to comment.