Skip to content

Commit

Permalink
Add performance tests for levenshtein distance in preparation for att…
Browse files Browse the repository at this point in the history
…empting to implement a fast algorithm.
  • Loading branch information
brentonashworth committed Apr 15, 2011
1 parent 60ebef8 commit eca2adf
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 70 deletions.
63 changes: 0 additions & 63 deletions src/clj_diff/core.clj

This file was deleted.

4 changes: 4 additions & 0 deletions src/clj_diff/ld.clj
@@ -0,0 +1,4 @@
(ns clj-diff.ld
"Fast implementation of Levenshtein Distance."
(:require [clj-diff [optimizations :as opt]]))

20 changes: 20 additions & 0 deletions src/clj_diff/other_ld.clj
@@ -0,0 +1,20 @@
(ns clj-diff.other-ld
"Other Levenshtein Disitance functions for use as a comparison.")

(defn- new-row
[prev-row row-elem t]
(reduce (fn [row [d-1 d e]]
(conj row
(if (= row-elem e)
d-1
(inc (min (peek row) d d-1)))))
[(inc (first prev-row))]
(map vector prev-row (next prev-row) t)))

(defn laurent-levenshtein
"Very compact implementation from Laurent PETIT."
[s t]
(peek (reduce (fn [prev-row s-elem]
(new-row prev-row s-elem t))
(range (inc (count t)))
s)))
39 changes: 32 additions & 7 deletions src/clj_diff/performance.clj
Expand Up @@ -4,6 +4,8 @@
(:require [clj-diff [core :as core]] (:require [clj-diff [core :as core]]
[clj-diff [myers :as myers]] [clj-diff [myers :as myers]]
[clj-diff [miller :as miller]] [clj-diff [miller :as miller]]
[clj-diff [ld :as ld]]
[clj-diff [other-ld :as other-ld]]
[incanter [stats :as stats]]) [incanter [stats :as stats]])
(:import name.fraser.neil.plaintext.diff_match_patch)) (:import name.fraser.neil.plaintext.diff_match_patch))


Expand Down Expand Up @@ -60,7 +62,7 @@
(+ lo (Math/abs (mod (. r nextInt) n))))) (+ lo (Math/abs (mod (. r nextInt) n)))))


(defn random-string (defn random-string
"Generage a random string composed of upper and lower case letters and the "Generate a random string composed of upper and lower case letters and the
numbers 0 through 9." numbers 0 through 9."
[size] [size]
(loop [length (random-between size size) (loop [length (random-between size size)
Expand Down Expand Up @@ -117,11 +119,12 @@
"For strings a and b, run each diff algorithm 'total-runs' times and then "For strings a and b, run each diff algorithm 'total-runs' times and then
calculate stats based on the fastest 'take-top' runs." calculate stats based on the fastest 'take-top' runs."
[fns a b take-top total-runs] [fns a b take-top total-runs]
(map #(let [[alg f] % (map #(let [[alg f result-fn] %
result-fn (or result-fn edit-distance)
d (take take-top d (take take-top
(sort-by :time (time* total-runs (sort-by :time (time* total-runs
(fn [] (f a b)) (fn [] (f a b))
edit-distance))) result-fn)))
times (map :time d) times (map :time d)
distances (distinct (map :result d)) distances (distinct (map :result d))
mean (stats/mean times) mean (stats/mean times)
Expand Down Expand Up @@ -184,9 +187,12 @@
(view :width width) (view :width width)
(save (str "charts/" file-name ".png") :width width)))))) (save (str "charts/" file-name ".png") :width width))))))


(defn test-range [size points] (defn test-range
"Return a sequence of n numbers between 0 and size which are evenly
distributed thoughout that range."
[size n]
(let [mutations (quot (* size 9) 10) (let [mutations (quot (* size 9) 10)
step (quot mutations points)] step (quot mutations n)]
(range 1 (inc mutations) step))) (range 1 (inc mutations) step)))


(defn- move-first-to-end* [a] (defn- move-first-to-end* [a]
Expand All @@ -205,7 +211,10 @@
(.substring a (- (count a) (quot half 2))))] (.substring a (- (count a) (quot half 2))))]
(mutate b (quot (count b) 10) 2))) (mutate b (quot (count b) 10) 2)))


(defn vary-mutation-100 [fns x n] (defn vary-mutation-100
"Given the functions to test (fns), the number of tests to run (x) and the
maximum size of a group of mutations (n), display a chart of test results."
[fns x n]
(let [d (vary-mutations fns 100 (test-range 100 x) (let [d (vary-mutations fns 100 (test-range 100 x)
5 5
(quot (* n 2) 3) (quot (* n 2) 3)
Expand Down Expand Up @@ -276,3 +285,19 @@
"Run the standard performance tests." "Run the standard performance tests."
[] []
(suite 15)) (suite 15))

;;
;; Performance tests for Levenshtein distance
;; ==========================================

(defn levenshtein-suite [x]
(let [fns [["Laurent" other-ld/laurent-levenshtein identity]
["Estimated" core/levenshtein-distance identity]]]
(do
(percent-change fns 1000 10 x 3)
(percent-change fns 5000 50 x 3))))

(defn ld-performance-tests
"Run the standard performance tests."
[]
(levenshtein-suite 10))

0 comments on commit eca2adf

Please sign in to comment.