Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SE-0163] Add Latin1 to Unicode codecs #32782

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion stdlib/public/core/CMakeLists.txt
Expand Up @@ -2,7 +2,7 @@
#
# This source file is part of the Swift.org open source project
#
# Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors
# Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
# Licensed under Apache License v2.0 with Runtime Library Exception
#
# See https://swift.org/LICENSE.txt for license information
Expand Down Expand Up @@ -219,6 +219,7 @@ set(SWIFTLIB_SOURCES
SliceBuffer.swift
SIMDVector.swift
UnfoldSequence.swift
UnicodeLatin1.swift
VarArgs.swift
Zip.swift
"${SWIFT_SOURCE_DIR}/stdlib/linker-support/magic-symbols-for-install-name.c"
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/GroupInfo.json
Expand Up @@ -43,6 +43,7 @@
"Unicode.swift",
"UnicodeEncoding.swift",
"UnicodeHelpers.swift",
"UnicodeLatin1.swift",
"UnicodeParser.swift",
"UnicodeScalar.swift",
"UnicodeScalarProperties.swift",
Expand Down
66 changes: 66 additions & 0 deletions stdlib/public/core/UnicodeLatin1.swift
@@ -0,0 +1,66 @@
//===--- UnicodeLatin1.swift ----------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2020 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
extension Unicode {

@frozen
public enum Latin1 {}
}

@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
extension Unicode.Latin1: Unicode.Encoding {

@frozen
public struct Parser {

@inlinable
public init() {}
}

public typealias CodeUnit = UInt8
public typealias EncodedScalar = CollectionOfOne<CodeUnit>
public typealias ForwardParser = Parser
public typealias ReverseParser = Parser

@inlinable
public static var encodedReplacementCharacter: EncodedScalar {
EncodedScalar(0x1A) // U+001A SUBSTITUTE
}

@inlinable
public static func decode(_ encodedScalar: EncodedScalar) -> Unicode.Scalar {
Unicode.Scalar(encodedScalar[0])
}

@inlinable
public static func encode(_ unicodeScalar: Unicode.Scalar) -> EncodedScalar? {
CodeUnit(exactly: unicodeScalar.value).map { EncodedScalar($0) }
}

@inlinable
public static func _isScalar(_: CodeUnit) -> Bool { true }
}

@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *)
extension Unicode.Latin1.Parser: Unicode.Parser {

public typealias Encoding = Unicode.Latin1

@inlinable
public mutating func parseScalar<I: IteratorProtocol>(
from codeUnits: inout I
) -> Unicode.ParseResult<Encoding.EncodedScalar>
where I.Element == Encoding.CodeUnit {
codeUnits.next().map { .valid(Encoding.EncodedScalar($0)) } ?? .emptyInput
}
}
79 changes: 78 additions & 1 deletion test/stdlib/Unicode.swift
Expand Up @@ -2,7 +2,7 @@
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
Expand Down Expand Up @@ -110,4 +110,81 @@ UnicodeAPIs.test("UTF-8 and UTF-16 queries") {
}
}

if #available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) {
var UnicodeLatin1 = TestSuite("UnicodeLatin1")

UnicodeLatin1.test("Encoding") {
let unicodeScalars = (UInt8.min ... UInt8.max).map { Unicode.Scalar($0) }
let encodedScalars = unicodeScalars.compactMap { Unicode.Latin1.encode($0) }
let decodedScalars = encodedScalars.map { Unicode.Latin1.decode($0) }
expectEqualSequence(unicodeScalars, decodedScalars)
expectNil(Unicode.Latin1.encode("\u{0100}"))
expectNil(Unicode.Latin1.encode("\u{10FFFF}"))
}

UnicodeLatin1.test("Parser") {
let codeUnits = UInt8.min ... UInt8.max
var codeUnitsIterator = codeUnits.makeIterator()
var encodedScalars: [Unicode.Latin1.EncodedScalar] = []
var forwardParser = Unicode.Latin1.ForwardParser()
loop: while true {
switch forwardParser.parseScalar(from: &codeUnitsIterator) {
case .valid(let encodedScalar):
encodedScalars.append(encodedScalar)
case .emptyInput:
expectEqualSequence(codeUnits, encodedScalars.joined())
break loop
case .error:
expectUnreachable()
break loop
}
}
}

UnicodeLatin1.test("Transcode") {
let codeUnitsAndText: [ClosedRange<UInt8>: String] = [
UInt8(0x20) ... UInt8(0x7E):
"""
\u{20}!"#$%&'()*+,-./0123456789:;<=>?\
@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\
`abcdefghijklmnopqrstuvwxyz{|}~
""",
UInt8(0xA0) ... UInt8(0xFF):
"""
\u{A0}¡¢£¤¥¦§¨©ª«¬\u{AD}®¯°±²³´µ¶·¸¹º»¼½¾¿\
ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß\
àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
""",
]
for (codeUnits, expectedText) in codeUnitsAndText {
let actualText = String(decoding: codeUnits, as: Unicode.Latin1.self)
expectEqual(expectedText, actualText)
}
for (expectedCodeUnits, text) in codeUnitsAndText {
var actualCodeUnits: [UInt8] = []
let hadError = transcode(
text.utf8.makeIterator(),
from: Unicode.UTF8.self,
to: Unicode.Latin1.self,
stoppingOnError: false,
into: { actualCodeUnits.append($0) }
)
expectEqualSequence(expectedCodeUnits, actualCodeUnits)
expectFalse(hadError)
}
do {
var actualCodeUnits: [UInt8] = []
let hadError = transcode(
"A\u{0100}B\u{10FFFF}C".utf8.makeIterator(),
from: Unicode.UTF8.self,
to: Unicode.Latin1.self,
stoppingOnError: false,
into: { actualCodeUnits.append($0) }
)
expectEqualSequence([0x41, 0x1A, 0x42, 0x1A, 0x43], actualCodeUnits)
expectFalse(hadError)
}
}
}

runAllTests()