Skip to content

Commit

Permalink
Merge pull request #25808 from numist/numist/diffing-performance-master
Browse files Browse the repository at this point in the history
Performance improvements and availability updates for Collection.difference(from:using:)
  • Loading branch information
numist committed Jul 1, 2019
2 parents ae065ff + 3d741f7 commit f994fc3
Show file tree
Hide file tree
Showing 8 changed files with 511 additions and 643 deletions.
2 changes: 2 additions & 0 deletions benchmark/CMakeLists.txt
Expand Up @@ -78,6 +78,7 @@ set(SWIFT_BENCH_MODULES
single-source/DictionaryRemove
single-source/DictionarySubscriptDefault
single-source/DictionarySwap
single-source/Diffing
single-source/DropFirst
single-source/DropLast
single-source/DropWhile
Expand All @@ -104,6 +105,7 @@ set(SWIFT_BENCH_MODULES
single-source/Memset
single-source/MonteCarloE
single-source/MonteCarloPi
single-source/Myers
single-source/NSDictionaryCastToSwift
single-source/NSError
single-source/NSStringConversion
Expand Down
125 changes: 125 additions & 0 deletions benchmark/single-source/Diffing.swift
@@ -0,0 +1,125 @@
//===--- Diffing.swift ----------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import TestsUtils

let t: [BenchmarkCategory] = [.api]
public let Diffing = [
BenchmarkInfo(
name: "DiffSame",
runFunction: run_DiffSame,
tags: t,
legacyFactor: 10),
BenchmarkInfo(
name: "DiffPangramToAlphabet",
runFunction: run_DiffPangramToAlphabet,
tags: t,
legacyFactor: 10),
BenchmarkInfo(
name: "DiffPangrams",
runFunction: run_DiffPangrams,
tags: t,
legacyFactor: 10),
BenchmarkInfo(
name: "DiffReversedAlphabets",
runFunction: run_DiffReversedAlphabets,
tags: t,
legacyFactor: 10),
BenchmarkInfo(
name: "DiffReversedLorem",
runFunction: run_DiffReversedLorem,
tags: t,
legacyFactor: 10),
BenchmarkInfo(
name: "DiffDisparate",
runFunction: run_DiffDisparate,
tags: t,
legacyFactor: 10),
BenchmarkInfo(
name: "DiffSimilar",
runFunction: run_DiffSimilar,
tags: t,
legacyFactor: 10),
]

let numbersAndSymbols = Array("0123456789`~!@#$%^&*()+=_-\"'?/<,>.\\{}'")
let alphabets = Array("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
let alphabetsReversed = Array("ZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjihgfedcba")
let longPangram = Array("This pangram contains four As, one B, two Cs, one D, thirty Es, six Fs, five Gs, seven Hs, eleven Is, one J, one K, two Ls, two Ms, eighteen Ns, fifteen Os, two Ps, one Q, five Rs, twenty-seven Ss, eighteen Ts, two Us, seven Vs, eight Ws, two Xs, three Ys, & one Z")
let typingPangram = Array("The quick brown fox jumps over the lazy dog")
let loremIpsum = Array("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.")
let unabridgedLorem = Array("Lorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliqua.")
let loremReverse = Array(".auqila angam erolod te erobal tu tnudidicni ropmet domsuie od des ,tile gnicsipida rutetcesnoc ,tema tis rolod muspi meroL")


@inline(never)
public func run_DiffSame(_ N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = longPangram.difference(from: longPangram)
}
}
}

@inline(never)
public func run_DiffPangramToAlphabet(_ N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = longPangram.difference(from: alphabets)
}
}
}

@inline(never)
public func run_DiffPangrams(_ N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = longPangram.difference(from: typingPangram)
}
}
}

@inline(never)
public func run_DiffReversedAlphabets(_ N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = alphabets.difference(from: alphabetsReversed)
}
}
}

@inline(never)
public func run_DiffReversedLorem(_ N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = loremIpsum.difference(from: loremReverse)
}
}
}

@inline(never)
public func run_DiffDisparate(_ N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = alphabets.difference(from: numbersAndSymbols)
}
}
}

@inline(never)
public func run_DiffSimilar(_ N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = loremIpsum.difference(from: unabridgedLorem)
}
}
}
196 changes: 196 additions & 0 deletions benchmark/single-source/Myers.swift
@@ -0,0 +1,196 @@
//===--- Myers.swift -------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import TestsUtils

public let Myers = [
BenchmarkInfo(name: "Myers", runFunction: run_Myers, tags: [.algorithm]),
]

let loremShort = Array("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.")
let loremLong = Array("Sed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam eaque ipsa, quae ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt, explicabo. Nemo enim ipsam voluptatem, quia voluptas sit, aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos, qui ratione voluptatem sequi nesciunt, neque porro quisquam est, qui dolorem ipsum, quia dolor sit amet consectetur adipisci[ng] velit, sed quia non-numquam [do] eius modi tempora inci[di]dunt, ut labore et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum[d] exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit, qui in ea voluptate velit esse, quam nihil molestiae consequatur, vel illum, qui dolorem eum fugiat, quo voluptas nulla pariatur?")

@inline(never)
public func run_Myers(N: Int) {
if #available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *) {
for _ in 1...N {
let _ = myers(from: loremShort, to: loremLong, using: ==)
}
}
}

// _V is a rudimentary type made to represent the rows of the triangular matrix type used by the Myer's algorithm
//
// This type is basically an array that only supports indexes in the set `stride(from: -d, through: d, by: 2)` where `d` is the depth of this row in the matrix
// `d` is always known at allocation-time, and is used to preallocate the structure.
fileprivate struct _V {

private var a: [Int]

// The way negative indexes are implemented is by interleaving them in the empty slots between the valid positive indexes
@inline(__always) private static func transform(_ index: Int) -> Int {
// -3, -1, 1, 3 -> 3, 1, 0, 2 -> 0...3
// -2, 0, 2 -> 2, 0, 1 -> 0...2
return (index <= 0 ? -index : index &- 1)
}

init(maxIndex largest: Int) {
a = [Int](repeating: 0, count: largest + 1)
}

subscript(index: Int) -> Int {
get {
return a[_V.transform(index)]
}
set(newValue) {
a[_V.transform(index)] = newValue
}
}
}

@available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *)
fileprivate func myers<C,D>(
from old: C, to new: D,
using cmp: (C.Element, D.Element) -> Bool
) -> CollectionDifference<C.Element>
where
C : BidirectionalCollection,
D : BidirectionalCollection,
C.Element == D.Element
{

// Core implementation of the algorithm described at http://www.xmailserver.org/diff2.pdf
// Variable names match those used in the paper as closely as possible
func _descent(from a: UnsafeBufferPointer<C.Element>, to b: UnsafeBufferPointer<D.Element>) -> [_V] {
let n = a.count
let m = b.count
let max = n + m

var result = [_V]()
var v = _V(maxIndex: 1)
v[1] = 0

var x = 0
var y = 0
iterator: for d in 0...max {
let prev_v = v
result.append(v)
v = _V(maxIndex: d)

// The code in this loop is _very_ hot—the loop bounds increases in terms
// of the iterator of the outer loop!
for k in stride(from: -d, through: d, by: 2) {
if k == -d {
x = prev_v[k &+ 1]
} else {
let km = prev_v[k &- 1]

if k != d {
let kp = prev_v[k &+ 1]
if km < kp {
x = kp
} else {
x = km &+ 1
}
} else {
x = km &+ 1
}
}
y = x &- k

while x < n && y < m {
if !cmp(a[x], b[y]) {
break;
}
x &+= 1
y &+= 1
}

v[k] = x

if x >= n && y >= m {
break iterator
}
}
if x >= n && y >= m {
break
}
}

return result
}

// Backtrack through the trace generated by the Myers descent to produce the changes that make up the diff
func _formChanges(
from a: UnsafeBufferPointer<C.Element>,
to b: UnsafeBufferPointer<C.Element>,
using trace: [_V]
) -> [CollectionDifference<C.Element>.Change] {
var changes = [CollectionDifference<C.Element>.Change]()

var x = a.count
var y = b.count
for d in stride(from: trace.count &- 1, to: 0, by: -1) {
let v = trace[d]
let k = x &- y
let prev_k = (k == -d || (k != d && v[k &- 1] < v[k &+ 1])) ? k &+ 1 : k &- 1
let prev_x = v[prev_k]
let prev_y = prev_x &- prev_k

while x > prev_x && y > prev_y {
// No change at this position.
x &-= 1
y &-= 1
}

assert((x == prev_x && y > prev_y) || (y == prev_y && x > prev_x))
if y != prev_y {
changes.append(.insert(offset: prev_y, element: b[prev_y], associatedWith: nil))
} else {
changes.append(.remove(offset: prev_x, element: a[prev_x], associatedWith: nil))
}

x = prev_x
y = prev_y
}

return changes
}

/* Splatting the collections into contiguous storage has two advantages:
*
* 1) Subscript access is much faster
* 2) Subscript index becomes Int, matching the iterator types in the algorithm
*
* Combined, these effects dramatically improves performance when
* collections differ significantly, without unduly degrading runtime when
* the parameters are very similar.
*
* In terms of memory use, the linear cost of creating a ContiguousArray (when
* necessary) is significantly less than the worst-case n² memory use of the
* descent algorithm.
*/
func _withContiguousStorage<C : Collection, R>(
for values: C,
_ body: (UnsafeBufferPointer<C.Element>) throws -> R
) rethrows -> R {
if let result = try values.withContiguousStorageIfAvailable(body) { return result }
let array = ContiguousArray(values)
return try array.withUnsafeBufferPointer(body)
}

return _withContiguousStorage(for: old) { a in
return _withContiguousStorage(for: new) { b in
return CollectionDifference(_formChanges(from: a, to: b, using:_descent(from: a, to: b)))!
}
}
}
4 changes: 4 additions & 0 deletions benchmark/utils/main.swift
Expand Up @@ -66,6 +66,7 @@ import DictionaryOfAnyHashableStrings
import DictionaryRemove
import DictionarySubscriptDefault
import DictionarySwap
import Diffing
import DropFirst
import DropLast
import DropWhile
Expand All @@ -92,6 +93,7 @@ import MapReduce
import Memset
import MonteCarloE
import MonteCarloPi
import Myers
import NibbleSort
import NIOChannelPipeline
import NSDictionaryCastToSwift
Expand Down Expand Up @@ -240,6 +242,7 @@ registerBenchmark(DictionaryOfAnyHashableStrings)
registerBenchmark(DictionaryRemove)
registerBenchmark(DictionarySubscriptDefault)
registerBenchmark(DictionarySwap)
registerBenchmark(Diffing)
registerBenchmark(DropFirst)
registerBenchmark(DropLast)
registerBenchmark(DropWhile)
Expand Down Expand Up @@ -267,6 +270,7 @@ registerBenchmark(MapReduce)
registerBenchmark(Memset)
registerBenchmark(MonteCarloE)
registerBenchmark(MonteCarloPi)
registerBenchmark(Myers)
registerBenchmark(NSDictionaryCastToSwift)
registerBenchmark(NSErrorTest)
#if os(macOS) || os(iOS) || os(watchOS) || os(tvOS)
Expand Down
Expand Up @@ -13,7 +13,7 @@
@_exported import Foundation // Clang module

// CollectionDifference<ChangeElement>.Change is conditionally bridged to NSOrderedCollectionChange
@available(iOS 9999, macOS 9999, tvOS 9999, watchOS 9999, *) // FIXME(availability-5.1)
@available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *)
extension CollectionDifference.Change : _ObjectiveCBridgeable {
@_semantics("convertToObjectiveC")
public func _bridgeToObjectiveC() -> NSOrderedCollectionChange {
Expand Down Expand Up @@ -66,7 +66,7 @@ extension CollectionDifference.Change : _ObjectiveCBridgeable {
}

// CollectionDifference<ChangeElement> is conditionally bridged to NSOrderedCollectionDifference
@available(iOS 9999, macOS 9999, tvOS 9999, watchOS 9999, *) // FIXME(availability-5.1)
@available(macOS 10.15, iOS 13, tvOS 13, watchOS 6, *)
extension CollectionDifference : _ObjectiveCBridgeable {
@_semantics("convertToObjectiveC")
public func _bridgeToObjectiveC() -> NSOrderedCollectionDifference {
Expand Down Expand Up @@ -101,6 +101,6 @@ extension CollectionDifference : _ObjectiveCBridgeable {

@_effects(readonly)
public static func _unconditionallyBridgeFromObjectiveC(_ s: NSOrderedCollectionDifference?) -> CollectionDifference {
return _formDifference(from: s!) { $0 as! Change }!
return _formDifference(from: s!) { ($0 as! Change) }!
}
}

0 comments on commit f994fc3

Please sign in to comment.