Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25808 from numist/numist/diffing-performance-master
Performance improvements and availability updates for Collection.difference(from:using:)
- Loading branch information
Showing
8 changed files
with
511 additions
and
643 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
//===--- Diffing.swift ----------------------------------------------------===// | ||
// | ||
// This source file is part of the Swift.org open source project | ||
// | ||
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors | ||
// Licensed under Apache License v2.0 with Runtime Library Exception | ||
// | ||
// See https://swift.org/LICENSE.txt for license information | ||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
import TestsUtils | ||
|
||
let t: [BenchmarkCategory] = [.api] | ||
public let Diffing = [ | ||
BenchmarkInfo( | ||
name: "DiffSame", | ||
runFunction: run_DiffSame, | ||
tags: t, | ||
legacyFactor: 10), | ||
BenchmarkInfo( | ||
name: "DiffPangramToAlphabet", | ||
runFunction: run_DiffPangramToAlphabet, | ||
tags: t, | ||
legacyFactor: 10), | ||
BenchmarkInfo( | ||
name: "DiffPangrams", | ||
runFunction: run_DiffPangrams, | ||
tags: t, | ||
legacyFactor: 10), | ||
BenchmarkInfo( | ||
name: "DiffReversedAlphabets", | ||
runFunction: run_DiffReversedAlphabets, | ||
tags: t, | ||
legacyFactor: 10), | ||
BenchmarkInfo( | ||
name: "DiffReversedLorem", | ||
runFunction: run_DiffReversedLorem, | ||
tags: t, | ||
legacyFactor: 10), | ||
BenchmarkInfo( | ||
name: "DiffDisparate", | ||
runFunction: run_DiffDisparate, | ||
tags: t, | ||
legacyFactor: 10), | ||
BenchmarkInfo( | ||
name: "DiffSimilar", | ||
runFunction: run_DiffSimilar, | ||
tags: t, | ||
legacyFactor: 10), | ||
] | ||
|
||
let numbersAndSymbols = Array("0123456789`~!@#$%^&*()+=_-\"'?/<,>.\\{}'") | ||
let alphabets = Array("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") | ||
let alphabetsReversed = Array("ZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjihgfedcba") | ||
let longPangram = Array("This pangram contains four As, one B, two Cs, one D, thirty Es, six Fs, five Gs, seven Hs, eleven Is, one J, one K, two Ls, two Ms, eighteen Ns, fifteen Os, two Ps, one Q, five Rs, twenty-seven Ss, eighteen Ts, two Us, seven Vs, eight Ws, two Xs, three Ys, & one Z") | ||
let typingPangram = Array("The quick brown fox jumps over the lazy dog") | ||
let loremIpsum = Array("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.") | ||
let unabridgedLorem = Array("Lorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliqua.") | ||
let loremReverse = Array(".auqila angam erolod te erobal tu tnudidicni ropmet domsuie od des ,tile gnicsipida rutetcesnoc ,tema tis rolod muspi meroL") | ||
|
||
|
||
@inline(never) | ||
public func run_DiffSame(_ N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = longPangram.difference(from: longPangram) | ||
} | ||
} | ||
} | ||
|
||
@inline(never) | ||
public func run_DiffPangramToAlphabet(_ N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = longPangram.difference(from: alphabets) | ||
} | ||
} | ||
} | ||
|
||
@inline(never) | ||
public func run_DiffPangrams(_ N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = longPangram.difference(from: typingPangram) | ||
} | ||
} | ||
} | ||
|
||
@inline(never) | ||
public func run_DiffReversedAlphabets(_ N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = alphabets.difference(from: alphabetsReversed) | ||
} | ||
} | ||
} | ||
|
||
@inline(never) | ||
public func run_DiffReversedLorem(_ N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = loremIpsum.difference(from: loremReverse) | ||
} | ||
} | ||
} | ||
|
||
@inline(never) | ||
public func run_DiffDisparate(_ N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = alphabets.difference(from: numbersAndSymbols) | ||
} | ||
} | ||
} | ||
|
||
@inline(never) | ||
public func run_DiffSimilar(_ N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = loremIpsum.difference(from: unabridgedLorem) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
//===--- Myers.swift -------------------------------------------===// | ||
// | ||
// This source file is part of the Swift.org open source project | ||
// | ||
// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors | ||
// Licensed under Apache License v2.0 with Runtime Library Exception | ||
// | ||
// See https://swift.org/LICENSE.txt for license information | ||
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
import TestsUtils | ||
|
||
public let Myers = [ | ||
BenchmarkInfo(name: "Myers", runFunction: run_Myers, tags: [.algorithm]), | ||
] | ||
|
||
let loremShort = Array("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.") | ||
let loremLong = Array("Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos, qui ratione voluptatem sequi nesciunt, neque porro quisquam est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum[d] exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur?") | ||
|
||
@inline(never) | ||
public func run_Myers(N: Int) { | ||
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) { | ||
for _ in 1...N { | ||
let _ = myers(from: loremShort, to: loremLong, using: ==) | ||
} | ||
} | ||
} | ||
|
||
// _V is a rudimentary type made to represent the rows of the triangular matrix type used by the Myer's algorithm | ||
// | ||
// This type is basically an array that only supports indexes in the set `stride(from: -d, through: d, by: 2)` where `d` is the depth of this row in the matrix | ||
// `d` is always known at allocation-time, and is used to preallocate the structure. | ||
fileprivate struct _V { | ||
|
||
private var a: [Int] | ||
|
||
// The way negative indexes are implemented is by interleaving them in the empty slots between the valid positive indexes | ||
@inline(__always) private static func transform(_ index: Int) -> Int { | ||
// -3, -1, 1, 3 -> 3, 1, 0, 2 -> 0...3 | ||
// -2, 0, 2 -> 2, 0, 1 -> 0...2 | ||
return (index <= 0 ? -index : index &- 1) | ||
} | ||
|
||
init(maxIndex largest: Int) { | ||
a = [Int](repeating: 0, count: largest + 1) | ||
} | ||
|
||
subscript(index: Int) -> Int { | ||
get { | ||
return a[_V.transform(index)] | ||
} | ||
set(newValue) { | ||
a[_V.transform(index)] = newValue | ||
} | ||
} | ||
} | ||
|
||
@available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) | ||
fileprivate func myers<C,D>( | ||
from old: C, to new: D, | ||
using cmp: (C.Element, D.Element) -> Bool | ||
) -> CollectionDifference<C.Element> | ||
where | ||
C : BidirectionalCollection, | ||
D : BidirectionalCollection, | ||
C.Element == D.Element | ||
{ | ||
|
||
// Core implementation of the algorithm described at http://www.xmailserver.org/diff2.pdf | ||
// Variable names match those used in the paper as closely as possible | ||
func _descent(from a: UnsafeBufferPointer<C.Element>, to b: UnsafeBufferPointer<D.Element>) -> [_V] { | ||
let n = a.count | ||
let m = b.count | ||
let max = n + m | ||
|
||
var result = [_V]() | ||
var v = _V(maxIndex: 1) | ||
v[1] = 0 | ||
|
||
var x = 0 | ||
var y = 0 | ||
iterator: for d in 0...max { | ||
let prev_v = v | ||
result.append(v) | ||
v = _V(maxIndex: d) | ||
|
||
// The code in this loop is _very_ hot—the loop bounds increases in terms | ||
// of the iterator of the outer loop! | ||
for k in stride(from: -d, through: d, by: 2) { | ||
if k == -d { | ||
x = prev_v[k &+ 1] | ||
} else { | ||
let km = prev_v[k &- 1] | ||
|
||
if k != d { | ||
let kp = prev_v[k &+ 1] | ||
if km < kp { | ||
x = kp | ||
} else { | ||
x = km &+ 1 | ||
} | ||
} else { | ||
x = km &+ 1 | ||
} | ||
} | ||
y = x &- k | ||
|
||
while x < n && y < m { | ||
if !cmp(a[x], b[y]) { | ||
break; | ||
} | ||
x &+= 1 | ||
y &+= 1 | ||
} | ||
|
||
v[k] = x | ||
|
||
if x >= n && y >= m { | ||
break iterator | ||
} | ||
} | ||
if x >= n && y >= m { | ||
break | ||
} | ||
} | ||
|
||
return result | ||
} | ||
|
||
// Backtrack through the trace generated by the Myers descent to produce the changes that make up the diff | ||
func _formChanges( | ||
from a: UnsafeBufferPointer<C.Element>, | ||
to b: UnsafeBufferPointer<C.Element>, | ||
using trace: [_V] | ||
) -> [CollectionDifference<C.Element>.Change] { | ||
var changes = [CollectionDifference<C.Element>.Change]() | ||
|
||
var x = a.count | ||
var y = b.count | ||
for d in stride(from: trace.count &- 1, to: 0, by: -1) { | ||
let v = trace[d] | ||
let k = x &- y | ||
let prev_k = (k == -d || (k != d && v[k &- 1] < v[k &+ 1])) ? k &+ 1 : k &- 1 | ||
let prev_x = v[prev_k] | ||
let prev_y = prev_x &- prev_k | ||
|
||
while x > prev_x && y > prev_y { | ||
// No change at this position. | ||
x &-= 1 | ||
y &-= 1 | ||
} | ||
|
||
assert((x == prev_x && y > prev_y) || (y == prev_y && x > prev_x)) | ||
if y != prev_y { | ||
changes.append(.insert(offset: prev_y, element: b[prev_y], associatedWith: nil)) | ||
} else { | ||
changes.append(.remove(offset: prev_x, element: a[prev_x], associatedWith: nil)) | ||
} | ||
|
||
x = prev_x | ||
y = prev_y | ||
} | ||
|
||
return changes | ||
} | ||
|
||
/* Splatting the collections into contiguous storage has two advantages: | ||
* | ||
* 1) Subscript access is much faster | ||
* 2) Subscript index becomes Int, matching the iterator types in the algorithm | ||
* | ||
* Combined, these effects dramatically improves performance when | ||
* collections differ significantly, without unduly degrading runtime when | ||
* the parameters are very similar. | ||
* | ||
* In terms of memory use, the linear cost of creating a ContiguousArray (when | ||
* necessary) is significantly less than the worst-case n² memory use of the | ||
* descent algorithm. | ||
*/ | ||
func _withContiguousStorage<C : Collection, R>( | ||
for values: C, | ||
_ body: (UnsafeBufferPointer<C.Element>) throws -> R | ||
) rethrows -> R { | ||
if let result = try values.withContiguousStorageIfAvailable(body) { return result } | ||
let array = ContiguousArray(values) | ||
return try array.withUnsafeBufferPointer(body) | ||
} | ||
|
||
return _withContiguousStorage(for: old) { a in | ||
return _withContiguousStorage(for: new) { b in | ||
return CollectionDifference(_formChanges(from: a, to: b, using:_descent(from: a, to: b)))! | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.