Skip to content

Commit

Permalink
Added optimisations for strings.
Browse files Browse the repository at this point in the history
  • Loading branch information
brentonashworth committed Nov 13, 2010
1 parent 68f4144 commit 93e8be9
Show file tree
Hide file tree
Showing 11 changed files with 91 additions and 30 deletions.
1 change: 1 addition & 0 deletions .cljrc.clj
@@ -1,4 +1,5 @@
(set! *print-length* 103)
(set! *print-level* 15)
#_(set! *warn-on-reflection* true)

(defn exit [] (. System exit 0))
Binary file removed charts/length_10.png
Binary file not shown.
Binary file removed charts/length_5.png
Binary file not shown.
Binary file removed charts/length_50.png
Binary file not shown.
Binary file removed charts/length_add_in_middle.png
Binary file not shown.
Binary file removed charts/length_move_first_to_end.png
Binary file not shown.
Binary file removed charts/mutations_100.png
Binary file not shown.
Binary file removed charts/mutations_1000.png
Binary file not shown.
93 changes: 67 additions & 26 deletions src/clj_diff/optimisations.clj
Expand Up @@ -7,23 +7,52 @@
;; Use protocols to provide different implementations for strings and
;; sequences.

(defn common-prefix [a b]
(let [a (seq a)
b (seq b)]
(let [common (count (take-while true? (map #(= %1 %2) a b)))
a (drop common a)
b (drop common b)]
[common a b])))

(defn common-suffix [a b]
(let [a (vec (seq a))
b (vec (seq b))]
(let [common (count (take-while true? (map #(= %1 %2)
(rseq a)
(rseq b))))
a (take (inc (- (count a) common)) a)
b (take (inc (- (count b) common)) b)]
[common a b])))
(defprotocol Common
(common-prefix [a b])
(common-suffix [a b]))

(extend-protocol Common

clojure.lang.Sequential
(common-prefix [a b]
(let [a (seq a)
b (seq b)
common (count (take-while true? (map #(= %1 %2) a b)))]
[common (drop common a) (drop common b)]))
(common-suffix [a b]
(let [a (vec (seq a))
b (vec (seq b))
common (count (take-while true? (map #(= %1 %2)
(rseq a)
(rseq b))))]
[common
(take (- (count a) common) a)
(take (- (count b) common) b)]))

java.lang.String
(common-prefix [^String a ^String b]
(let [n (Math/min (.length a) (.length b))
i (loop [i 0]
(if (< i n)
(if (not= (.charAt a i) (.charAt b i))
i
(recur (inc i)))
n))]
[i (.substring a i) (.substring b i)]))
(common-suffix [^String a ^String b]
(let [a-length (.length a)
b-length (.length b)
n (Math/min a-length b-length)
i (loop [i 1]
(if (<= i n)
(if (not= (.charAt a (- a-length i))
(.charAt b (- b-length i)))
(dec i)
(recur (inc i)))
n))]
[i
(.substring a 0 (- a-length i))
(.substring b 0 (- b-length i))])))

(defn shortcut [a b]
(cond (or (nil? a) (nil? b))
Expand All @@ -35,15 +64,27 @@
:- (vec (range 0 (count a)))}
:else nil))

(defn- diff* [a b f]
(or (cond (= (count a) 0) {:+ [(vec (concat [-1] (seq b)))]
:- []}
(= (count b) 0) {:+ []
:- (vec (range 0 (count a)))}
:else nil)
(f a b)))

(defn diff
"Wrap the diff function f in pre and post optimisations."
[a b f]
(or (shortcut a b)
(let [[prefix a b] (common-prefix a b)
[suffix a b] (common-suffix a b)
diffs (f a b)]
(if (> prefix 0)
{:+ (vec (map #(apply vector
(+ prefix (first %)) (rest %)) (:+ diffs)))
:- (vec (map #(+ prefix %) (:- diffs)))}
diffs))))
(let [diffs (cond (or (nil? a) (nil? b))
(throw (IllegalArgumentException. "Cannot diff nil."))
(= a b) {:+ [] :- []}
:else nil)]
(or diffs
(let [[prefix a b] (common-prefix a b)
[suffix a b] (common-suffix a b)
diffs (diff* a b f)]
(if (> prefix 0)
{:+ (vec (map #(apply vector
(+ prefix (first %)) (rest %)) (:+ diffs)))
:- (vec (map #(+ prefix %) (:- diffs)))}
diffs)))))
8 changes: 4 additions & 4 deletions test/clj_diff/test/compare.clj
Expand Up @@ -44,14 +44,14 @@
(deftest correct-diff-miller-test
(random-diff->patch-test miller/diff))

;; In some curcumstances, the edit distance is slightly higher for
;; In some circumstances, the edit distance is slightly higher for
;; myers and miller than it is for fraser. Use the output of these
;; errors to track down the problem.
(deftest same-edit-distance
#_(deftest same-edit-distance
(dotimes [_ 100]
(let [a (random-string (random-between 20 60))
(let [a (random-string (random-between 10 20))
size (count a)
mutations (random-between 10 (quot size 2))
mutations (random-between 5 (quot size 2))
groups (random-between 1 (quot mutations 2))
b (mutate a mutations groups)
myers-dist (edit-distance (myers/diff a b))
Expand Down
19 changes: 19 additions & 0 deletions test/clj_diff/test/optimisations.clj
@@ -0,0 +1,19 @@
(ns clj-diff.test.optimisations
(:use [clj-diff.optimisations])
(:use [clojure.test]))

(deftest common-prefix-test
(is (= (common-prefix "abcdef" "abcxyz")
[3 "def" "xyz"]))
(is (= (common-prefix "xy" "ab")
[0 "xy" "ab"]))
(is (= (common-prefix "ab" "ab")
[2 "" ""])))

(deftest common-suffix-test
(is (= (common-suffix "defabc" "xyzabc")
[3 "def" "xyz"]))
(is (= (common-suffix "xy" "ab")
[0 "xy" "ab"]))
(is (= (common-suffix "ab" "ab")
[2 "" ""])))

0 comments on commit 93e8be9

Please sign in to comment.