From 822075b312216242424462e42915585ede522a81 Mon Sep 17 00:00:00 2001 From: Gordon Guthrie Date: Sat, 24 Feb 2018 19:37:38 +0000 Subject: [PATCH 1/3] Fix of bugs documented in the new regression tests. Change also: * refactors the code to make it more readable (necessary to debug it) * adds a fuzz test that generates 1,000 sample random tests --- lib/diff.ex | 123 ++++++++++++++++++++++++-------------- test/diff_string_test.exs | 99 ++++++++++++++++++++++++++++-- 2 files changed, 172 insertions(+), 50 deletions(-) diff --git a/lib/diff.ex b/lib/diff.ex index 815a8bd..b62761b 100644 --- a/lib/diff.ex +++ b/lib/diff.ex @@ -41,16 +41,23 @@ defmodule Diff do defp do_patch(original, %Diff.Insert{element: element, index: index}) do { left, right } = Enum.split(original, index) - left ++ element ++ right + _return = left ++ element ++ right end - defp do_patch(original, %Diff.Delete{ element: _, index: index, length: length }) do + defp do_patch(original, %Diff.Delete{ element: element, index: index, + length: length }) do { left, deleted } = Enum.split(original, index) - { _, right } = Enum.split(deleted, length) - left ++ right + { actuallydeleted, right } = Enum.split(deleted, length) + case element do + ^actuallydeleted -> + _return = left ++ right + other -> + exit("failed delete") + end end - defp do_patch(original, %Diff.Modified{element: element, old_element: _, index: index, length: length}) do + defp do_patch(original, %Diff.Modified{element: element, old_element: _, + index: index, length: length}) do { left, deleted } = Enum.split(original, index) { _, right } = Enum.split(deleted, length) left ++ element ++ right @@ -85,12 +92,15 @@ defmodule Diff do end defp longest_common_subsequence(x, y, x_length, y_length) do - matrix = Matrix.new(x_length + 1, y_length + 1) - matrix = Enum.reduce(1..x_length, matrix, fn(i, matrix) -> + matrix = Matrix.new(x_length + 1, y_length + 1) - Enum.reduce(1..y_length, matrix, fn(j, matrix) -> + # a reduction over a 2D array requires a closure inside an anonymous function + # sorry but there is nothing to be done about that + rowreductionFn = fn(i, matrix) -> + # setup the second closure + columnreductionFn = fn(j, matrix) -> if Enum.fetch!(x, i-1) == Enum.fetch!(y, j-1) do value = Matrix.get(matrix, i-1, j-1) Matrix.put(matrix, i, j, value + 1) @@ -100,89 +110,114 @@ defmodule Diff do Matrix.put(matrix, i, j, max(original_value, changed_value)) end + end - end) + Enum.reduce(1..y_length, matrix, columnreductionFn) - end) + end + + _matrix = Enum.reduce(1..x_length, matrix, rowreductionFn) - matrix end defp build_diff(matrix, x, y, i, j, edits, options) do cond do i > 0 and j > 0 and Enum.fetch!(x, i-1) == Enum.fetch!(y, j-1) -> - if Dict.get(options, :keep_unchanged, false) do - edits = edits ++ [{:unchanged, Enum.fetch!(x, i-1), i-1}] - end - - build_diff(matrix, x, y, i-1, j-1, edits, options) + newedits = if Dict.get(options, :keep_unchanged, false) do + edits ++ [{:unchanged, Enum.fetch!(x, i-1), i-1}] + else + edits + end + build_diff(matrix, x, y, i-1, j-1, newedits, options) j > 0 and (i == 0 or Matrix.get(matrix, i, j-1) >= Matrix.get(matrix,i-1, j)) -> - build_diff(matrix, x, y, i, j-1, edits ++ [{:insert, Enum.fetch!(y, j-1), j-1}], options) + newedit = {:insert, Enum.fetch!(y, j-1), j-1} + build_diff(matrix, x, y, i, j-1, edits ++ [newedit], options) i > 0 and (j == 0 or Matrix.get(matrix, i, j-1) < Matrix.get(matrix, i-1, j)) -> - build_diff(matrix, x, y, i-1, j, edits ++ [{:delete, Enum.fetch!(x, i-1), i-1}], options) + newdelete = {:delete, Enum.fetch!(x, i-1), j} + build_diff(matrix, x, y, i-1, j, edits ++ [newdelete], options) true -> edits |> Enum.reverse end end defp build_changes(edits, options) do - Enum.reduce(edits, [], fn({type, char, index}, changes) -> + + # we now have a set of individual letter changes + # but if there is a series of inserts or deletes then + # we need to reduce them into single multichar changes + mergeindividualchangesFn = fn({type, char, index} = params, changes) -> if changes == [] do - changes ++ [change(type, char, index)] + changes ++ [make_change(type, char, index)] else change = List.last(changes) regex = Dict.get(options, :ignore) - cond do regex && Regex.match?(regex, char) -> - changes ++ [change(:ignored, char, index)] - is_type(change, type) && index == (change.index + change.length) -> - change = %{change | element: change.element ++ [char], length: change.length + 1 } - - if regex && Regex.match?(regex, Enum.join(change.element)) do - change = %Ignored{ element: change.element, index: change.index, length: change.length } - end - + changes ++ [make_change(:ignored, char, index)] + # one branch for deletes + is_type(change, type) && type == :delete && index == change.index -> + change = if regex && Regex.match?(regex, Enum.join(change.element)) do + %Ignored{ element: change.element, index: change.index, + length: change.length } + else + %{change | element: change.element ++ [char], length: + change.length + 1 } + end + List.replace_at(changes, length(changes)-1, change) + # a different branch for everyone else + is_type(change, type) && type != :delete && index == (change.index + change.length) -> + change = if regex && Regex.match?(regex, Enum.join(change.element)) do + %Ignored{ element: change.element, index: change.index, + length: change.length } + else + %{change | element: change.element ++ [char], length: + change.length + 1 } + end List.replace_at(changes, length(changes)-1, change) true -> - changes ++ [change(type, char, index)] - + changes ++ [make_change(type, char, index)] end end + end - - end) - - |> Enum.reduce([], fn(x, changes) -> + # if we change a single letter it will be a consecutive delete/insert + # this reduction merges them into a single modified statement + makemodifiedFn = fn(x, changes) -> if changes == [] do [x] else last_change = List.last(changes) - - if is_type(last_change, :delete) and is_type(x, :insert) and last_change.index == x.index and last_change.length == x.length do - last_change = %Modified{ element: x.element, old_element: last_change.element, index: x.index, length: x.length } + if is_type(last_change, :delete) + and is_type(x, :insert) + and last_change.index == x.index + and last_change.length == x.length do + last_change = %Modified{ element: x.element, old_element: last_change.element, + index: x.index, length: x.length } List.replace_at(changes, length(changes) - 1, last_change) else changes ++ [x] end end - end) - end + end + # Now do both these sets of reduction on the edits + Enum.reduce(edits, [], mergeindividualchangesFn) + |> Enum.reduce([], makemodifiedFn) + end - defp change(:insert, char, index) do + defp make_change(:insert, char, index) do %Insert{ element: [char], index: index, length: 1 } end - defp change(:delete, char, index) do + defp make_change(:delete, char, index) do %Delete{ element: [char], index: index, length: 1 } end - defp change(:unchanged, char, index) do + defp make_change(:unchanged, char, index) do %Unchanged{ element: [char], index: index, length: 1 } end - defp change(:ignored, char, index) do + defp make_change(:ignored, char, index) do %Ignored{ element: [char], index: index, length: 1 } end diff --git a/test/diff_string_test.exs b/test/diff_string_test.exs index 374380e..2daff72 100644 --- a/test/diff_string_test.exs +++ b/test/diff_string_test.exs @@ -26,20 +26,107 @@ defmodule Diff.String.Test do assert results == [%Diff.Modified{index: 1, length: 2, old_element: ["e", "s"], element: ["a", "r"]}] end - test "applies patches correctly" do + test "ignores properly" do original = "test" - changed = "taste" + changed = "test test" + + patches = Diff.diff(original, changed, ignore: ~r/^\s+$/) + assert List.last(patches) == %Diff.Ignored{index: 4, length: 1, element: [" "]} + end + + # rest of the tests are models tests + # take two strings and generate a set of patches + # apply the patches to the original and assert that it matches + # the changed string + + test "applies patches when strings match" do + original = "test" + changed = "test" patches = Diff.diff(original, changed) assert Diff.patch(original, patches, &Enum.join/1) == changed end - test "ignores properly" do + + test "applies patches when one character is added" do original = "test" - changed = "test test" + changed = "tests" - patches = Diff.diff(original, changed, ignore: ~r/^\s+$/) - assert List.last(patches) == %Diff.Ignored{index: 4, length: 1, element: [" "]} + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + end + + test "applies patches correctly when one character is removed" do + original = "test" + changed = "tes" + + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + end + + test "applies patches correctly when one character is modified" do + original = "test" + changed = "tesr" + + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + end + + # regression tests + test "applies patches correctly with mixed inserts and deletes" do + original = "xab" + changed = "x1a2" + + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + end + + test "applies patches correctly with mixed inserts and deletes (2)" do + original = "abcc" + changed = "bd" + + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + end + + test "applies patches correctly with mixed inserts and deletes (3)" do + original = "aabc" + changed = "be" + + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + end + + # bang a random test in see how she goes + test "random stuff going on" do + + original = "sfjksfjk324m, b0 sdlkaj kdsf " + changed = "iuw ewjnpjenjew o90ufdh ewnm2320y" + + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + end + + # Fuzz test - generate random strings and check they all work + + test "fuzz test" do + fuzz(1000) + end + + defp fuzz(0) do + :ok + end + + defp fuzz(n) when is_integer(n) and n > 0 do + + no_of_chars = 100 + offset = :crypto.rand_uniform(0, no_of_chars) + sign = :crypto.rand_uniform(0, 2) -1 + original = :crypto.strong_rand_bytes(no_of_chars) + changed = :crypto.strong_rand_bytes(no_of_chars + (offset * sign)) + patches = Diff.diff(original, changed) + assert Diff.patch(original, patches, &Enum.join/1) == changed + fuzz(n - 1) end end From d3ef76371e550968260ab55638c3c6a0d6afb1d7 Mon Sep 17 00:00:00 2001 From: Gordon Guthrie Date: Sun, 25 Feb 2018 11:36:23 +0000 Subject: [PATCH 2/3] minor tidy up --- test/diff_string_test.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/diff_string_test.exs b/test/diff_string_test.exs index 2daff72..f1f97b8 100644 --- a/test/diff_string_test.exs +++ b/test/diff_string_test.exs @@ -18,7 +18,8 @@ defmodule Diff.String.Test do test "returns changes when one character is modified" do results = Diff.diff("test", "tesr") - assert results == [%Diff.Modified{index: 3, length: 1, old_element: ["t"], element: ["r"]}] + assert results == [%Diff.Modified{index: 3, length: 1, old_element: ["t"], + element: ["r"]}] end test "returns changes when modified in the middle" do @@ -47,7 +48,6 @@ defmodule Diff.String.Test do assert Diff.patch(original, patches, &Enum.join/1) == changed end - test "applies patches when one character is added" do original = "test" changed = "tests" From 9b8b1fde494be6a82502bdf8bd18c64552b31805 Mon Sep 17 00:00:00 2001 From: Gordon Guthrie Date: Sun, 25 Feb 2018 11:37:01 +0000 Subject: [PATCH 3/3] Added a annotated patch function that annotates changes when apply the patches Updated the README with an example Added a new test suite --- README.md | 27 ++++++++++- lib/diff.ex | 91 ++++++++++++++++++++++++++--------- test/diff_annotation_test.exs | 70 +++++++++++++++++++++++++++ 3 files changed, 165 insertions(+), 23 deletions(-) create mode 100644 test/diff_annotation_test.exs diff --git a/README.md b/README.md index b361707..c5c7ffc 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,31 @@ iex(2)> Diff.patch("test", patches, &Enum.join/1) "taste" ``` +## Diff.annotated_patch -`Diff.diff` and `Diff.patch` both take as a first parameter a term that has an implementation of the `Diff.Diffable` protocol. +`Diff.annotated_patch` takes a a data structure of annotations, and uses them when applying the patch. + +Usage: +```elixir +iex(1)> annotations = [ +...(1)> %{delete: %{before: "", +...(1)> after: ""}}, +...(1)> %{insert: %{before: "", +...(1)> after: ""}}, +...(1)> %{modified: %{before: "", +...(1)> after: ""}} +...(1)> ] +[%{delete: %{after: "", before: ""}}, + %{insert: %{after: "", before: ""}}, + %{modified: %{after: "", before: ""}}] +iex(2)> patches = Diff.diff("test", "tast") +[%Diff.Modified{element: ["a"], index: 1, length: 1, old_element: ["e"]}] +iex(3)> Diff.annotated_patch("test", patches, annotations) +iex(3)> Diff.annotated_patch("test", patches, annotations) +["t", "", "a", "", "s", "t"] +``` + +It takes the same optional join function as `Diff.patch` as you would expect + +`Diff.diff`, `Diff.patch` and `Diff.annotated_patch` all take as a first parameter a term that has an implementation of the `Diff.Diffable` protocol. By default one exist for `BitString` and `List` diff --git a/lib/diff.ex b/lib/diff.ex index b62761b..e29547c 100644 --- a/lib/diff.ex +++ b/lib/diff.ex @@ -25,51 +25,79 @@ defmodule Diff do defstruct [:element, :index, :length] end + @doc""" + Applies with patches with supplied annotation (top and tail) + This is used to generate visual diffs, etc + Shares the same code as patch + """ + def annotated_patch(original, patches, annotations, from_list_fn \\ fn(list) -> list end) do + apply_patches(original, patches, annotations, from_list_fn) + end + @doc """ Applies the patches from a previous diff to the given string. Will return the patched version as a list unless a from_list_fn/1 is supplied. This function will takes the patched list as input and outputs the result. """ def patch(original, patches, from_list_fn \\ fn(list) -> list end) do + apply_patches(original, patches, [], from_list_fn) + end + + defp apply_patches(original, patches, annotations, from_list_fn) do original = Diffable.to_list(original) - Enum.reduce(patches, original, fn(patch, changed) -> - do_patch(changed, patch) - end) - |> from_list_fn.() + patchfn = fn(patch, {increment, changed}) -> + do_patch({increment, changed}, patch, annotations) + end + + increment = 0 + {_, returnlist} = Enum.reduce(patches, {increment, original}, patchfn) + from_list_fn.(returnlist) end - defp do_patch(original, %Diff.Insert{element: element, index: index}) do - { left, right } = Enum.split(original, index) - _return = left ++ element ++ right + defp do_patch({incr, original}, %Diff.Insert{element: element, index: index}, + annotations) do + { left, right } = Enum.split(original, index + incr) + {newelement, newincr} = annotate(element, :insert, annotations, incr) + return = left ++ newelement ++ right + {newincr, return} end - defp do_patch(original, %Diff.Delete{ element: element, index: index, - length: length }) do - { left, deleted } = Enum.split(original, index) + defp do_patch({incr, original}, %Diff.Delete{ element: element, index: index, + length: length }, annotations) do + { left, deleted } = Enum.split(original, index + incr) { actuallydeleted, right } = Enum.split(deleted, length) case element do ^actuallydeleted -> - _return = left ++ right - other -> + {newelement, newincr} = annotate(element, :deleted, annotations, incr) + return = left ++ newelement ++ right + {newincr, return} + _other -> exit("failed delete") end end - defp do_patch(original, %Diff.Modified{element: element, old_element: _, - index: index, length: length}) do - { left, deleted } = Enum.split(original, index) + defp do_patch({incr, original}, + %Diff.Modified{ element: element, old_element: _, + index: index, length: length}, + annotations) do + { left, deleted } = Enum.split(original, index + incr) { _, right } = Enum.split(deleted, length) - left ++ element ++ right + {newelement, newincr} = annotate(element, :modified, annotations, incr) + return = left ++ newelement ++ right + {newincr, return} end - defp do_patch(original, %Diff.Unchanged{}) do - original + defp do_patch({incr, original}, %Diff.Unchanged{}, _annotations) do + {incr, original} end - defp do_patch(original, %Diff.Ignored{element: element, index: index}) do - { left, right } = Enum.split(original, index) - left ++ element ++ right + defp do_patch({incr, original}, %Diff.Ignored{element: element, index: index}, + annotations) do + { left, right } = Enum.split(original, index + incr) + {newelement, newincr} = annotate(element, :ignored, annotations, incr) + return = left ++ newelement ++ right + {newincr, return} end @doc""" @@ -145,7 +173,7 @@ defmodule Diff do # we now have a set of individual letter changes # but if there is a series of inserts or deletes then # we need to reduce them into single multichar changes - mergeindividualchangesFn = fn({type, char, index} = params, changes) -> + mergeindividualchangesFn = fn({type, char, index}, changes) -> if changes == [] do changes ++ [make_change(type, char, index)] else @@ -241,4 +269,23 @@ defmodule Diff do false end + defp annotate(list, type, annotations, increment) do + annotation = for a <- annotations, + Map.get(a, type) != nil, do: Map.get(a, type) + case {type, annotation} do + {:deleted, []} -> {[], increment} + {_, []} -> {list, increment} + {:deleted, [annotation]} -> apply_deletion(list, annotation, increment) + {_, [annotation]} -> apply_annotation(list, annotation, increment) + end + end + + defp apply_deletion(list, annotation, increment) do + {[annotation.before] ++ list ++ [annotation.after], increment + 2} + end + + defp apply_annotation(list, annotation, increment) do + {[annotation.before] ++ list ++ [annotation.after], increment + 2} + end + end diff --git a/test/diff_annotation_test.exs b/test/diff_annotation_test.exs new file mode 100644 index 0000000..dee97ba --- /dev/null +++ b/test/diff_annotation_test.exs @@ -0,0 +1,70 @@ +defmodule Diff.Annotation.Test do + use ExUnit.Case + + test "no results when strings match" do + original = "test" + changed = "test" + patches = Diff.diff(original, changed) + assert Diff.annotated_patch(original, patches, get_annotations(), + &Enum.join/1) == changed + end + + test "do a modification" do + original = "test" + changed = "tast" + patches = Diff.diff(original, changed) + final = Diff.annotated_patch(original, patches, get_annotations(), + &Enum.join/1) + assert final, "tast" + end + + test "do a deletion" do + original = "test" + changed = "tst" + patches = Diff.diff(original, changed) + final = Diff.annotated_patch(original, patches, get_annotations(), + &Enum.join/1) + assert final, "tast" + end + + test "do an insertion" do + original = "test" + changed = "teast" + patches = Diff.diff(original, changed) + final = Diff.annotated_patch(original, patches, get_annotations(), + &Enum.join/1) + assert final, "teast" + end + + test "do a mixed test" do + original = "abcdefghijklmnopqrst" + changed = "abcefgh1ikkmnqrxxst" + patches = Diff.diff(original, changed) + final = Diff.annotated_patch(original, patches, get_annotations(), + &Enum.join/1) + assert final, """ + abcd + fgh + 1 + ik + k + mn + abcop + qr + xx + st + """ + end + + defp get_annotations() do + [ + %{delete: %{before: "", + after: ""}}, + %{insert: %{before: "", + after: ""}}, + %{modified: %{before: "", + after: ""}} + ] + end + +end