Skip to content

Commit

Permalink
trying out different costs for edit distance
Browse files Browse the repository at this point in the history
  • Loading branch information
harrah committed Apr 27, 2011
1 parent 515386d commit 6c6ecce
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 11 deletions.
9 changes: 2 additions & 7 deletions main/Command.scala
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,9 @@ object Command
if(suggested.isEmpty) "" else suggested.mkString(" (similar: ", ", ", ")")
}
def suggestions(a: String, bs: Seq[String], maxDistance: Int = 3, maxSuggestions: Int = 3): Seq[String] =
bs.map { b => (b, distance(a, b) ) } filter withinDistance(maxDistance, a) sortBy(_._2) take(maxSuggestions) map(_._1)
bs.map { b => (b, distance(a, b) ) } filter (_._2 <= maxDistance) sortBy(_._2) take(maxSuggestions) map(_._1)
def distance(a: String, b: String): Int =
EditDistance.levenshtein(a, b, insertCost = 1, deleteCost = 1, subCost = 2, transposeCost = 1, true)
def withinDistance(dist: Int, a: String)(ai: (String, Int)): Boolean =
{
val lengthBased = ( (ai._1.length min a.length) - 1 ) max 2
ai._2 <= (dist min lengthBased)
}
EditDistance.levenshtein(a, b, insertCost = 1, deleteCost = 1, subCost = 2, transposeCost = 1, matchCost = -1, true)
}

trait Help
Expand Down
10 changes: 6 additions & 4 deletions util/complete/EditDistance.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ object EditDistance {
* http://www.merriampark.com/ld.htm
* which is declared to be public domain.
*/
def levenshtein(s: String, t: String, insertCost: Int, deleteCost: Int, subCost: Int, transposeCost: Int, transpositions: Boolean = false): Int = {
def levenshtein(s: String, t: String, insertCost: Int = 1, deleteCost: Int = 1, subCost: Int = 1, transposeCost: Int = 1, matchCost: Int = 0, transpositions: Boolean = false): Int = {
val n = s.length
val m = t.length
if (n == 0) return m
Expand All @@ -18,17 +18,19 @@ object EditDistance {

for (i <- 1 to n ; val s_i = s(i - 1) ; j <- 1 to m) {
val t_j = t(j - 1)
val cost = if (s_i == t_j) 0 else 1
val cost = if (s_i == t_j) matchCost else subCost
val tcost = if (s_i == t_j) matchCost else transposeCost


val c1 = d(i - 1)(j) + deleteCost
val c2 = d(i)(j - 1) + insertCost
val c3 = d(i - 1)(j - 1) + cost*subCost
val c3 = d(i - 1)(j - 1) + cost

d(i)(j) = c1 min c2 min c3

if (transpositions) {
if (i > 1 && j > 1 && s(i - 1) == t(j - 2) && s(i - 2) == t(j - 1))
d(i)(j) = d(i)(j) min (d(i - 2)(j - 2) + cost*transposeCost)
d(i)(j) = d(i)(j) min (d(i - 2)(j - 2) + cost)
}
}

Expand Down

0 comments on commit 6c6ecce

Please sign in to comment.