Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NFC] Improving edit distance string extension #446

Merged
merged 2 commits into from
Jun 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 75 additions & 19 deletions Sources/ArgumentParser/Utilities/StringExtensions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -141,30 +141,86 @@ extension StringProtocol where SubSequence == Substring {
return Swift.max(rows, columns)
}

var matrix = Array(repeating: Array(repeating: 0, count: columns + 1), count: rows + 1)

for row in 1...rows {
matrix[row][0] = row
// Trim common prefix and suffix
var selfStartTrim = self.startIndex
var targetStartTrim = target.startIndex
while selfStartTrim < self.endIndex &&
targetStartTrim < target.endIndex &&
self[selfStartTrim] == target[targetStartTrim] {
self.formIndex(after: &selfStartTrim)
target.formIndex(after: &targetStartTrim)
}

var selfEndTrim = self.endIndex
var targetEndTrim = target.endIndex

while selfEndTrim > selfStartTrim &&
targetEndTrim > targetStartTrim {
let selfIdx = self.index(before: selfEndTrim)
let targetIdx = target.index(before: targetEndTrim)

guard self[selfIdx] == target[targetIdx] else {
break
}

selfEndTrim = selfIdx
targetEndTrim = targetIdx
}
for column in 1...columns {
matrix[0][column] = column

// Equal strings
guard !(selfStartTrim == self.endIndex &&
targetStartTrim == target.endIndex) else {
return 0
}

for row in 1...rows {
for column in 1...columns {
let source = self[self.index(self.startIndex, offsetBy: row - 1)]
let target = target[target.index(target.startIndex, offsetBy: column - 1)]
let cost = source == target ? 0 : 1

matrix[row][column] = Swift.min(
matrix[row - 1][column] + 1,
matrix[row][column - 1] + 1,
matrix[row - 1][column - 1] + cost
)
// After trimming common prefix and suffix, self is empty.
guard selfStartTrim < selfEndTrim else {
return target.distance(from: targetStartTrim,
to: targetEndTrim)
}

// After trimming common prefix and suffix, target is empty.
guard targetStartTrim < targetEndTrim else {
return distance(from: selfStartTrim,
to: selfEndTrim)
}

let newSelf = self[selfStartTrim..<selfEndTrim]
let newTarget = target[targetStartTrim..<targetEndTrim]

let m = newSelf.count
let n = newTarget.count

// Initialize the levenshtein matrix with only two rows
// current and previous.
var previousRow = [Int](repeating: 0, count: n + 1)
var currentRow = [Int](0...n)

var sourceIdx = newSelf.startIndex
for i in 1...m {
swap(&previousRow, &currentRow)
currentRow[0] = i

var targetIdx = newTarget.startIndex
for j in 1...n {
// If characteres are equal for the levenshtein algorithm the
// minimum will always be the substitution cost, so we can fast
// path here in order to avoid min calls.
if newSelf[sourceIdx] == newTarget[targetIdx] {
currentRow[j] = previousRow[j - 1]
} else {
let deletion = previousRow[j]
let insertion = currentRow[j - 1]
let substitution = previousRow[j - 1]
currentRow[j] = Swift.min(deletion, Swift.min(insertion, substitution)) + 1
}
// j += 1
newTarget.formIndex(after: &targetIdx)
}
// i += 1
newSelf.formIndex(after: &sourceIdx)
}

return matrix.last!.last!
return currentRow[n]
}

func indentingEachLine(by n: Int) -> String {
Expand Down
5 changes: 5 additions & 0 deletions Tests/ArgumentParserUnitTests/StringEditDistanceTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,10 @@ extension StringEditDistanceTests {
XCTAssertEqual("bar".editDistance(to: "foo"), 3)
XCTAssertEqual("bar".editDistance(to: "baz"), 1)
XCTAssertEqual("baz".editDistance(to: "bar"), 1)
XCTAssertEqual("friend".editDistance(to: "fresh"), 3)
XCTAssertEqual("friend".editDistance(to: "friend"), 0)
XCTAssertEqual("friend".editDistance(to: "fried"), 1)
XCTAssertEqual("friend".editDistance(to: "friendly"), 2)
XCTAssertEqual("friendly".editDistance(to: "friend"), 2)
}
}