From b10ee9ac73637f528978fd412a679d5cebaa34e8 Mon Sep 17 00:00:00 2001 From: Akshat Gupta Date: Wed, 13 Aug 2025 00:03:30 +0530 Subject: [PATCH] Detect moved items in ordered iterables --- README.md | 12 ++++++ deepdiff/diff.py | 90 ++++++++++++++++++++++++++++++++--------- docs/basics.rst | 8 ++++ tests/test_diff_text.py | 19 +++++---- 4 files changed, 102 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 5c28ee1..de4f7a5 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,18 @@ Tested on Python 3.9+ and PyPy3. +### Detect moved items in lists + +DeepDiff reports items that only change position in an ordered iterable under +the ``iterable_item_moved`` key: + +```python +>>> from deepdiff import DeepDiff +>>> DeepDiff([1, 2, 3, 4], [4, 2, 3, 1], verbose_level=2) +{'iterable_item_moved': {'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}}} +``` + - **[Documentation](https://zepworks.com/deepdiff/8.6.0/)** ## What is new? diff --git a/deepdiff/diff.py b/deepdiff/diff.py index 43ccd00..eb34247 100755 --- a/deepdiff/diff.py +++ b/deepdiff/diff.py @@ -1015,32 +1015,28 @@ def _diff_ordered_iterable_by_difflib( opcodes = seq.get_opcodes() opcodes_with_values = [] + replace_opcodes: List[Opcode] = [] - # TODO: this logic should be revisted so we detect reverse operations - # like when a replacement happens at index X and a reverse replacement happens at index Y - # in those cases we have a "iterable_item_moved" operation. for tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index in opcodes: if tag == 'equal': - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - )) + opcodes_with_values.append( + Opcode(tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index) + ) continue - # print('{:7} t1[{}:{}] --> t2[{}:{}] {!r:>8} --> {!r}'.format( - # tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, level.t1[t1_from_index:t1_to_index], level.t2[t2_from_index:t2_to_index])) - opcodes_with_values.append(Opcode( - tag, t1_from_index, t1_to_index, t2_from_index, t2_to_index, - old_values = level.t1[t1_from_index: t1_to_index], - new_values = level.t2[t2_from_index: t2_to_index], - )) + opcode = Opcode( + tag, + t1_from_index, + t1_to_index, + t2_from_index, + t2_to_index, + old_values=level.t1[t1_from_index:t1_to_index], + new_values=level.t2[t2_from_index:t2_to_index], + ) + opcodes_with_values.append(opcode) if tag == 'replace': - self._diff_by_forming_pairs_and_comparing_one_by_one( - level, local_tree=local_tree, parents_ids=parents_ids, - _original_type=_original_type, child_relationship_class=child_relationship_class, - t1_from_index=t1_from_index, t1_to_index=t1_to_index, - t2_from_index=t2_from_index, t2_to_index=t2_to_index, - ) + replace_opcodes.append(opcode) elif tag == 'delete': for index, x in enumerate(level.t1[t1_from_index:t1_to_index]): change_level = level.branch_deeper( @@ -1061,6 +1057,62 @@ def _diff_ordered_iterable_by_difflib( child_relationship_param2=index + t2_from_index, ) self._report_result('iterable_item_added', change_level, local_tree=local_tree) + + used: Set[int] = set() + for i, opcode_a in enumerate(replace_opcodes): + if i in used: + continue + for j in range(i + 1, len(replace_opcodes)): + opcode_b = replace_opcodes[j] + if j in used: + continue + if ( + opcode_a.old_values == opcode_b.new_values + and opcode_a.new_values == opcode_b.old_values + and len(opcode_a.old_values or []) == len(opcode_b.old_values or []) + ): + length = len(opcode_a.old_values or []) + for offset in range(length): + val_a = opcode_a.old_values[offset] + new_index_a = opcode_b.t2_from_index + offset + change_level = level.branch_deeper( + val_a, + val_a, + child_relationship_class=child_relationship_class, + child_relationship_param=opcode_a.t1_from_index + offset, + child_relationship_param2=new_index_a, + ) + self._report_result('iterable_item_moved', change_level, local_tree=local_tree) + + val_b = opcode_b.old_values[offset] + new_index_b = opcode_a.t2_from_index + offset + change_level = level.branch_deeper( + val_b, + val_b, + child_relationship_class=child_relationship_class, + child_relationship_param=opcode_b.t1_from_index + offset, + child_relationship_param2=new_index_b, + ) + self._report_result('iterable_item_moved', change_level, local_tree=local_tree) + + used.update({i, j}) + break + + for idx, opcode in enumerate(replace_opcodes): + if idx in used: + continue + self._diff_by_forming_pairs_and_comparing_one_by_one( + level, + local_tree=local_tree, + parents_ids=parents_ids, + _original_type=_original_type, + child_relationship_class=child_relationship_class, + t1_from_index=opcode.t1_from_index, + t1_to_index=opcode.t1_to_index, + t2_from_index=opcode.t2_from_index, + t2_to_index=opcode.t2_to_index, + ) + return opcodes_with_values diff --git a/docs/basics.rst b/docs/basics.rst index c944d28..0897d0e 100644 --- a/docs/basics.rst +++ b/docs/basics.rst @@ -106,6 +106,14 @@ List difference >>> pprint (ddiff, indent = 2) {'iterable_item_removed': {"root[4]['b'][2]": 3, "root[4]['b'][3]": 4}} +List item moved + >>> t1 = [1, 2, 3, 4] + >>> t2 = [4, 2, 3, 1] + >>> pprint(DeepDiff(t1, t2, verbose_level=2), indent=2) + { 'iterable_item_moved': { + 'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}}} + List that contains dictionary: >>> t1 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:1, 2:2}]}} >>> t2 = {1:1, 2:2, 3:3, 4:{"a":"hello", "b":[1, 2, {1:3}]}} diff --git a/tests/test_diff_text.py b/tests/test_diff_text.py index fb0087b..7eb9a0a 100755 --- a/tests/test_diff_text.py +++ b/tests/test_diff_text.py @@ -1819,14 +1819,17 @@ def test_list_item_removed_from_the_middle(self): assert {"root[4]"} == diff.affected_paths assert {4} == diff.affected_root_keys - # TODO: we need to support reporting that items have been swapped - # def test_item_moved(self): - # # currently all the items in the list need to be hashables - # t1 = [1, 2, 3, 4] - # t2 = [4, 2, 3, 1] - # diff = DeepDiff(t1, t2) - # result = {} # it should show that those items are swapped. - # assert result == diff + def test_item_moved(self): + t1 = [1, 2, 3, 4] + t2 = [4, 2, 3, 1] + diff = DeepDiff(t1, t2, verbose_level=2) + result = { + 'iterable_item_moved': { + 'root[0]': {'new_path': 'root[3]', 'value': 1}, + 'root[3]': {'new_path': 'root[0]', 'value': 4}, + } + } + assert result == diff def test_list_item_values_replace_in_the_middle(self): t1 = [0, 1, 2, 3, 'bye', 5, 6, 7, 8, 'a', 'b', 'c']