Skip to content

Commit

Permalink
Improving edit distance string extension
Browse files Browse the repository at this point in the history
  • Loading branch information
LucianoPAlmeida committed May 14, 2022
1 parent 060d523 commit b959d6d
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 19 deletions.
94 changes: 75 additions & 19 deletions Sources/ArgumentParser/Utilities/StringExtensions.swift
Expand Up @@ -141,30 +141,86 @@ extension StringProtocol where SubSequence == Substring {
return Swift.max(rows, columns)
}

var matrix = Array(repeating: Array(repeating: 0, count: columns + 1), count: rows + 1)

for row in 1...rows {
matrix[row][0] = row
// Trim common prefix and suffix
var selfStartTrim = self.startIndex
var targetStartTrim = target.startIndex
while selfStartTrim < self.endIndex &&
targetStartTrim < target.endIndex &&
self[selfStartTrim] == target[targetStartTrim] {
self.formIndex(after: &selfStartTrim)
target.formIndex(after: &targetStartTrim)
}

var selfEndTrim = self.endIndex
var targetEndTrim = target.endIndex

while selfEndTrim > selfStartTrim &&
targetEndTrim > targetStartTrim {
let selfIdx = self.index(before: selfEndTrim)
let targetIdx = target.index(before: targetEndTrim)

guard self[selfIdx] == target[targetIdx] else {
break
}

selfEndTrim = selfIdx
targetEndTrim = targetIdx
}
for column in 1...columns {
matrix[0][column] = column

// Equal strings
guard !(selfStartTrim == self.endIndex &&
targetStartTrim == target.endIndex) else {
return 0
}

for row in 1...rows {
for column in 1...columns {
let source = self[self.index(self.startIndex, offsetBy: row - 1)]
let target = target[target.index(target.startIndex, offsetBy: column - 1)]
let cost = source == target ? 0 : 1

matrix[row][column] = Swift.min(
matrix[row - 1][column] + 1,
matrix[row][column - 1] + 1,
matrix[row - 1][column - 1] + cost
)
// After trimming common prefix and suffix, self is empty.
guard selfStartTrim < selfEndTrim else {
return target.distance(from: targetStartTrim,
to: targetEndTrim)
}

// After trimming common prefix and suffix, target is empty.
guard targetStartTrim < targetEndTrim else {
return distance(from: selfStartTrim,
to: selfEndTrim)
}

let newSelf = self[selfStartTrim..<selfEndTrim]
let newTarget = target[targetStartTrim..<targetEndTrim]

let m = newSelf.count
let n = newTarget.count

// Initialize the levenshtein matrix with only two rows
// current and previous.
var previousRow = [Int](repeating: 0, count: n + 1)
var currentRow = [Int](0...n)

var sourceIdx = newSelf.startIndex
for i in 1...m {
swap(&previousRow, &currentRow)
currentRow[0] = i

var targetIdx = newTarget.startIndex
for j in 1...n {
// If characteres are equal for the levenshtein algorithm the
// minimum will always be the substitution cost, so we can fast
// path here in order to avoid min calls.
if newSelf[sourceIdx] == newTarget[targetIdx] {
currentRow[j] = previousRow[j - 1]
} else {
let deletion = previousRow[j]
let insertion = currentRow[j - 1]
let substitution = previousRow[j - 1]
currentRow[j] = Swift.min(deletion, Swift.min(insertion, substitution)) + 1
}
// j += 1
newTarget.formIndex(after: &targetIdx)
}
// i += 1
newSelf.formIndex(after: &sourceIdx)
}

return matrix.last!.last!
return currentRow[n]
}

func indentingEachLine(by n: Int) -> String {
Expand Down
5 changes: 5 additions & 0 deletions Tests/ArgumentParserUnitTests/StringEditDistanceTests.swift
Expand Up @@ -23,5 +23,10 @@ extension StringEditDistanceTests {
XCTAssertEqual("bar".editDistance(to: "foo"), 3)
XCTAssertEqual("bar".editDistance(to: "baz"), 1)
XCTAssertEqual("baz".editDistance(to: "bar"), 1)
XCTAssertEqual("friend".editDistance(to: "fresh"), 3)
XCTAssertEqual("friend".editDistance(to: "friend"), 0)
XCTAssertEqual("friend".editDistance(to: "fried"), 1)
XCTAssertEqual("friend".editDistance(to: "friendly"), 2)
XCTAssertEqual("friendly".editDistance(to: "friend"), 2)
}
}

0 comments on commit b959d6d

Please sign in to comment.