Skip to content

Commit

Permalink
[stdlib] String index interchange, part II (UTF16)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dave Abrahams committed May 21, 2017
1 parent e68b601 commit bb2424f
Show file tree
Hide file tree
Showing 12 changed files with 69 additions and 267 deletions.
19 changes: 0 additions & 19 deletions stdlib/public/SDK/Foundation/ExtraStringAPIs.swift
Expand Up @@ -10,25 +10,6 @@
//
//===----------------------------------------------------------------------===//

// Random access for String.UTF16View, only when Foundation is
// imported. Making this API dependent on Foundation decouples the
// Swift core from a UTF16 representation.
extension String.UTF16View.Index : Strideable {
/// Construct from an integer offset.
public init(_ offset: Int) {
_precondition(offset >= 0, "Negative UTF16 index offset not allowed")
self.init(_offset: offset)
}

public func distance(to other: String.UTF16View.Index) -> Int {
return _offset.distance(to: other._offset)
}

public func advanced(by n: Int) -> String.UTF16View.Index {
return String.UTF16View.Index(_offset.advanced(by: n))
}
}

extension String.UTF16View : RandomAccessCollection {}
extension String.UTF16View.Indices : RandomAccessCollection {}

4 changes: 2 additions & 2 deletions stdlib/public/SDK/Foundation/URLComponents.swift
Expand Up @@ -194,8 +194,8 @@ public struct URLComponents : ReferenceConvertible, Hashable, Equatable, _Mutabl
private func _toStringRange(_ r : NSRange) -> Range<String.Index>? {
guard r.location != NSNotFound else { return nil }

let utf16Start = String.UTF16View.Index(_offset: r.location)
let utf16End = String.UTF16View.Index(_offset: r.location + r.length)
let utf16Start = String.UTF16View.Index(encodedOffset: r.location)
let utf16End = String.UTF16View.Index(encodedOffset: r.location + r.length)

guard let s = self.string else { return nil }
guard let start = String.Index(utf16Start, within: s) else { return nil }
Expand Down
55 changes: 2 additions & 53 deletions stdlib/public/core/StringIndexConversions.swift
Expand Up @@ -54,57 +54,6 @@ extension String.Index {
self = unicodeScalarIndex
}

/// Creates an index in the given string that corresponds exactly to the
/// specified `UTF16View` position.
///
/// The following example finds the position of a space in a string's `utf16`
/// view and then converts that position to an index in the string. The
/// value `32` is the UTF-16 encoded value of a space character.
///
/// let cafe = "Café 🍵"
///
/// let utf16Index = cafe.utf16.index(of: 32)!
/// let stringIndex = String.Index(utf16Index, within: cafe)!
///
/// print(cafe[..<stringIndex])
/// // Prints "Café"
///
/// If the position passed in `utf16Index` doesn't have an exact
/// corresponding position in `other`, the result of the initializer is
/// `nil`. For example, an attempt to convert the position of the trailing
/// surrogate of a UTF-16 surrogate pair fails.
///
/// The next example attempts to convert the indices of the two UTF-16 code
/// points that represent the teacup emoji (`"🍵"`). The index of the lead
/// surrogate is successfully converted to a position in `other`, but the
/// index of the trailing surrogate is not.
///
/// let emojiHigh = cafe.utf16.index(after: utf16Index)
/// print(String.Index(emojiHigh, within: cafe))
/// // Prints "Optional(String.Index(...))"
///
/// let emojiLow = cafe.utf16.index(after: emojiHigh)
/// print(String.Index(emojiLow, within: cafe))
/// // Prints "nil"
///
/// - Parameters:
/// - utf16Index: A position in the `utf16` view of the `other` parameter.
/// - other: The string referenced by both `utf16Index` and the resulting
/// index.
public init?(
_ utf16Index: String.UTF16Index,
within other: String
) {
if let me = utf16Index.samePosition(
in: other.unicodeScalars
)?.samePosition(in: other) {
self = me
}
else {
return nil
}
}

/// Creates an index in the given string that corresponds exactly to the
/// specified `UTF8View` position.
///
Expand Down Expand Up @@ -150,7 +99,7 @@ extension String.Index {
/// - Returns: The position in `utf8` that corresponds exactly to this index.
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index {
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
}

Expand Down Expand Up @@ -199,7 +148,7 @@ extension String.Index {
public func samePosition(
in unicodeScalars: String.UnicodeScalarView
) -> String.UnicodeScalarView.Index {
return String.UnicodeScalarView.Index(self, within: unicodeScalars)
return String.UnicodeScalarView.Index(self, within: unicodeScalars)!
}
}

8 changes: 4 additions & 4 deletions stdlib/public/core/StringRangeReplaceableCollection.swift.gyb
Expand Up @@ -188,7 +188,7 @@ extension String {
///
/// - Parameter characters: A sequence of characters.
public init<S : Sequence>(_ characters: S)
where S.Element == Character {
where S.Iterator.Element == Character {
self._core = CharacterView(characters)._core
}

Expand Down Expand Up @@ -230,7 +230,7 @@ extension String {
///
/// - Parameter newElements: A sequence of characters.
public mutating func append<S : Sequence>(contentsOf newElements: S)
where S.Element == Character {
where S.Iterator.Element == Character {
withMutableCharacters {
(v: inout CharacterView) in v.append(contentsOf: newElements)
}
Expand All @@ -253,7 +253,7 @@ extension String {
public mutating func replaceSubrange<C>(
_ bounds: Range<Index>,
with newElements: C
) where C : Collection, C.Element == Character {
) where C : Collection, C.Iterator.Element == Character {
withMutableCharacters {
(v: inout CharacterView)
in v.replaceSubrange(bounds, with: newElements)
Expand Down Expand Up @@ -293,7 +293,7 @@ extension String {
/// `newElements`.
public mutating func insert<S : Collection>(
contentsOf newElements: S, at i: Index
) where S.Element == Character {
) where S.Iterator.Element == Character {
withMutableCharacters {
(v: inout CharacterView) in v.insert(contentsOf: newElements, at: i)
}
Expand Down
126 changes: 16 additions & 110 deletions stdlib/public/core/StringUTF16.swift
Expand Up @@ -115,45 +115,21 @@ extension String {
CustomStringConvertible,
CustomDebugStringConvertible {

/// A position in a string's collection of UTF-16 code units.
///
/// You can convert between indices of the different string views by using
/// conversion initializers and the `samePosition(in:)` method overloads.
/// For example, the following code sample finds the index of the first
/// space in a string and then converts that to the same
/// position in the UTF-16 view.
///
/// let hearts = "Hearts <3 ♥︎ 💘"
/// if let i = hearts.index(of: " ") {
/// let j = i.samePosition(in: hearts.utf16)
/// print(Array(hearts.utf16[j...]))
/// print(hearts.utf16[j...])
/// }
/// // Prints "[32, 60, 51, 32, 9829, 65038, 32, 55357, 56472]"
/// // Prints " <3 ♥︎ 💘"
public struct Index {
// Foundation needs access to these fields so it can expose
// random access
public // SPI(Foundation)
init(_offset: Int) { self._offset = _offset }

public let _offset: Int
}

public typealias Index = String.Index
public typealias IndexDistance = Int

/// The position of the first code unit if the `String` is
/// nonempty; identical to `endIndex` otherwise.
public var startIndex: Index {
return Index(_offset: _offset)
return Index(encodedOffset: _offset)
}

/// The "past the end" position---that is, the position one greater than
/// the last valid subscript argument.
///
/// In an empty UTF-16 view, `endIndex` is equal to `startIndex`.
public var endIndex: Index {
return Index(_offset: _offset + _length)
return Index(encodedOffset: _offset + _length)
}

public struct Indices {
Expand All @@ -170,37 +146,37 @@ extension String {
// TODO: swift-3-indexing-model - add docs
public func index(after i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check i?
return Index(_offset: _unsafePlus(i._offset, 1))
return Index(encodedOffset: _unsafePlus(i.encodedOffset, 1))
}

// TODO: swift-3-indexing-model - add docs
public func index(before i: Index) -> Index {
// FIXME: swift-3-indexing-model: range check i?
return Index(_offset: _unsafeMinus(i._offset, 1))
return Index(encodedOffset: _unsafeMinus(i.encodedOffset, 1))
}

// TODO: swift-3-indexing-model - add docs
public func index(_ i: Index, offsetBy n: IndexDistance) -> Index {
// FIXME: swift-3-indexing-model: range check i?
return Index(_offset: i._offset.advanced(by: n))
return Index(encodedOffset: i.encodedOffset.advanced(by: n))
}

// TODO: swift-3-indexing-model - add docs
public func index(
_ i: Index, offsetBy n: IndexDistance, limitedBy limit: Index
) -> Index? {
// FIXME: swift-3-indexing-model: range check i?
let d = i._offset.distance(to: limit._offset)
let d = i.encodedOffset.distance(to: limit.encodedOffset)
if (d > 0) ? (d < n) : (d > n) {
return nil
}
return Index(_offset: i._offset.advanced(by: n))
return Index(encodedOffset: i.encodedOffset.advanced(by: n))
}

// TODO: swift-3-indexing-model - add docs
public func distance(from start: Index, to end: Index) -> IndexDistance {
// FIXME: swift-3-indexing-model: range check start and end?
return start._offset.distance(to: end._offset)
return start.encodedOffset.distance(to: end.encodedOffset)
}

func _internalIndex(at i: Int) -> Int {
Expand All @@ -223,7 +199,7 @@ extension String {
_precondition(i >= startIndex && i < endIndex,
"out-of-range access on a UTF16View")

let index = _internalIndex(at: i._offset)
let index = _internalIndex(at: i.encodedOffset)
let u = _core[index]
if _fastPath((u &>> 11) != 0b1101_1) {
// Neither high-surrogate, nor low-surrogate -- well-formed sequence
Expand Down Expand Up @@ -276,8 +252,8 @@ extension String {
public subscript(bounds: Range<Index>) -> UTF16View {
return UTF16View(
_core,
offset: _internalIndex(at: bounds.lowerBound._offset),
length: bounds.upperBound._offset - bounds.lowerBound._offset)
offset: _internalIndex(at: bounds.lowerBound.encodedOffset),
length: bounds.upperBound.encodedOffset - bounds.lowerBound.encodedOffset)
}

internal init(_ _core: _StringCore) {
Expand Down Expand Up @@ -337,9 +313,9 @@ extension String {
let wholeString = String(utf16._core)

guard
let start = UTF16Index(_offset: utf16._offset)
let start = UTF16Index(encodedOffset: utf16._offset)
.samePosition(in: wholeString),
let end = UTF16Index(_offset: utf16._offset + utf16._length)
let end = UTF16Index(encodedOffset: utf16._offset + utf16._length)
.samePosition(in: wholeString)
else
{
Expand All @@ -357,24 +333,6 @@ extension String.UTF16View : _SwiftStringView {
var _persistentContent : String { return String(self._core) }
}

extension String.UTF16View.Index : Comparable {
// FIXME: swift-3-indexing-model: add complete set of forwards for Comparable
// assuming String.UTF8View.Index continues to exist
public static func == (
lhs: String.UTF16View.Index,
rhs: String.UTF16View.Index
) -> Bool {
return lhs._offset == rhs._offset
}

public static func < (
lhs: String.UTF16View.Index,
rhs: String.UTF16View.Index
) -> Bool {
return lhs._offset < rhs._offset
}
}

// Index conversions
extension String.UTF16View.Index {
/// Creates an index in the given UTF-16 view that corresponds exactly to the
Expand Down Expand Up @@ -415,7 +373,7 @@ extension String.UTF16View.Index {
if !utf8Index._isOnUnicodeScalarBoundary(in: core) {
return nil
}
_offset = utf8Index._coreIndex
self = String.Index(encodedOffset: utf8Index._coreIndex)
}

/// Creates an index in the given UTF-16 view that corresponds exactly to the
Expand All @@ -437,33 +395,7 @@ extension String.UTF16View.Index {
/// `String(utf16).indices`.
/// - utf16: The `UTF16View` in which to find the new position.
public init(_ index: String.Index, within utf16: String.UTF16View) {
_offset = index.encodedOffset
}

/// Returns the position in the given UTF-8 view that corresponds exactly to
/// this index.
///
/// The index must be a valid index of `String(utf8).utf16`.
///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method to find the same
/// position in the string's `utf8` view.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe.utf8)!
/// print(Array(cafe.utf8[..<j]))
/// // Prints "[67, 97, 102, 195, 169]"
///
/// - Parameter utf8: The view to use for the index conversion.
/// - Returns: The position in `utf8` that corresponds exactly to this index.
/// If this index does not have an exact corresponding position in `utf8`,
/// this method returns `nil`. For example, an attempt to convert the
/// position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition(
in utf8: String.UTF8View
) -> String.UTF8View.Index? {
return String.UTF8View.Index(self, within: utf8)
self = index
}

/// Returns the position in the given view of Unicode scalars that
Expand Down Expand Up @@ -492,32 +424,6 @@ extension String.UTF16View.Index {
) -> String.UnicodeScalarIndex? {
return String.UnicodeScalarIndex(self, within: unicodeScalars)
}

/// Returns the position in the given string that corresponds exactly to this
/// index.
///
/// This index must be a valid index of `characters.utf16`.
///
/// This example first finds the position of a space (UTF-16 code point `32`)
/// in a string's `utf16` view and then uses this method find the same position
/// in the string.
///
/// let cafe = "Café 🍵"
/// let i = cafe.utf16.index(of: 32)!
/// let j = i.samePosition(in: cafe)!
/// print(cafe[..<j])
/// // Prints "Café"
///
/// - Parameter characters: The string to use for the index conversion.
/// - Returns: The position in `characters` that corresponds exactly to this
/// index. If this index does not have an exact corresponding position in
/// `characters`, this method returns `nil`. For example, an attempt to
/// convert the position of a UTF-16 trailing surrogate returns `nil`.
public func samePosition(
in characters: String
) -> String.Index? {
return String.Index(self, within: characters)
}
}

// Reflection
Expand Down

0 comments on commit bb2424f

Please sign in to comment.