diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt index 2d33bc1769fbe..875c65f99a6f7 100644 --- a/stdlib/public/core/CMakeLists.txt +++ b/stdlib/public/core/CMakeLists.txt @@ -2,7 +2,7 @@ # # This source file is part of the Swift.org open source project # -# Copyright (c) 2014 - 2019 Apple Inc. and the Swift project authors +# Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors # Licensed under Apache License v2.0 with Runtime Library Exception # # See https://swift.org/LICENSE.txt for license information @@ -219,6 +219,7 @@ set(SWIFTLIB_SOURCES SliceBuffer.swift SIMDVector.swift UnfoldSequence.swift + UnicodeLatin1.swift VarArgs.swift Zip.swift "${SWIFT_SOURCE_DIR}/stdlib/linker-support/magic-symbols-for-install-name.c" diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json index 289703802610a..77009867bea97 100644 --- a/stdlib/public/core/GroupInfo.json +++ b/stdlib/public/core/GroupInfo.json @@ -43,6 +43,7 @@ "Unicode.swift", "UnicodeEncoding.swift", "UnicodeHelpers.swift", + "UnicodeLatin1.swift", "UnicodeParser.swift", "UnicodeScalar.swift", "UnicodeScalarProperties.swift", diff --git a/stdlib/public/core/UnicodeLatin1.swift b/stdlib/public/core/UnicodeLatin1.swift new file mode 100644 index 0000000000000..7628bd657c608 --- /dev/null +++ b/stdlib/public/core/UnicodeLatin1.swift @@ -0,0 +1,66 @@ +//===--- UnicodeLatin1.swift ----------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) +extension Unicode { + + @frozen + public enum Latin1 {} +} + +@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) +extension Unicode.Latin1: Unicode.Encoding { + + @frozen + public struct Parser { + + @inlinable + public init() {} + } + + public typealias CodeUnit = UInt8 + public typealias EncodedScalar = CollectionOfOne + public typealias ForwardParser = Parser + public typealias ReverseParser = Parser + + @inlinable + public static var encodedReplacementCharacter: EncodedScalar { + EncodedScalar(0x1A) // U+001A SUBSTITUTE + } + + @inlinable + public static func decode(_ encodedScalar: EncodedScalar) -> Unicode.Scalar { + Unicode.Scalar(encodedScalar[0]) + } + + @inlinable + public static func encode(_ unicodeScalar: Unicode.Scalar) -> EncodedScalar? { + CodeUnit(exactly: unicodeScalar.value).map { EncodedScalar($0) } + } + + @inlinable + public static func _isScalar(_: CodeUnit) -> Bool { true } +} + +@available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) +extension Unicode.Latin1.Parser: Unicode.Parser { + + public typealias Encoding = Unicode.Latin1 + + @inlinable + public mutating func parseScalar( + from codeUnits: inout I + ) -> Unicode.ParseResult + where I.Element == Encoding.CodeUnit { + codeUnits.next().map { .valid(Encoding.EncodedScalar($0)) } ?? .emptyInput + } +} diff --git a/test/stdlib/Unicode.swift b/test/stdlib/Unicode.swift index d3c77fd2060a7..0af16ac820247 100644 --- a/test/stdlib/Unicode.swift +++ b/test/stdlib/Unicode.swift @@ -2,7 +2,7 @@ // // This source file is part of the Swift.org open source project // -// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information @@ -110,4 +110,81 @@ UnicodeAPIs.test("UTF-8 and UTF-16 queries") { } } +if #available(macOS 9999, iOS 9999, tvOS 9999, watchOS 9999, *) { + var UnicodeLatin1 = TestSuite("UnicodeLatin1") + + UnicodeLatin1.test("Encoding") { + let unicodeScalars = (UInt8.min ... UInt8.max).map { Unicode.Scalar($0) } + let encodedScalars = unicodeScalars.compactMap { Unicode.Latin1.encode($0) } + let decodedScalars = encodedScalars.map { Unicode.Latin1.decode($0) } + expectEqualSequence(unicodeScalars, decodedScalars) + expectNil(Unicode.Latin1.encode("\u{0100}")) + expectNil(Unicode.Latin1.encode("\u{10FFFF}")) + } + + UnicodeLatin1.test("Parser") { + let codeUnits = UInt8.min ... UInt8.max + var codeUnitsIterator = codeUnits.makeIterator() + var encodedScalars: [Unicode.Latin1.EncodedScalar] = [] + var forwardParser = Unicode.Latin1.ForwardParser() + loop: while true { + switch forwardParser.parseScalar(from: &codeUnitsIterator) { + case .valid(let encodedScalar): + encodedScalars.append(encodedScalar) + case .emptyInput: + expectEqualSequence(codeUnits, encodedScalars.joined()) + break loop + case .error: + expectUnreachable() + break loop + } + } + } + + UnicodeLatin1.test("Transcode") { + let codeUnitsAndText: [ClosedRange: String] = [ + UInt8(0x20) ... UInt8(0x7E): + """ + \u{20}!"#$%&'()*+,-./0123456789:;<=>?\ + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\ + `abcdefghijklmnopqrstuvwxyz{|}~ + """, + UInt8(0xA0) ... UInt8(0xFF): + """ + \u{A0}¡¢£¤¥¦§¨©ª«¬\u{AD}®¯°±²³´µ¶·¸¹º»¼½¾¿\ + ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß\ + àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ + """, + ] + for (codeUnits, expectedText) in codeUnitsAndText { + let actualText = String(decoding: codeUnits, as: Unicode.Latin1.self) + expectEqual(expectedText, actualText) + } + for (expectedCodeUnits, text) in codeUnitsAndText { + var actualCodeUnits: [UInt8] = [] + let hadError = transcode( + text.utf8.makeIterator(), + from: Unicode.UTF8.self, + to: Unicode.Latin1.self, + stoppingOnError: false, + into: { actualCodeUnits.append($0) } + ) + expectEqualSequence(expectedCodeUnits, actualCodeUnits) + expectFalse(hadError) + } + do { + var actualCodeUnits: [UInt8] = [] + let hadError = transcode( + "A\u{0100}B\u{10FFFF}C".utf8.makeIterator(), + from: Unicode.UTF8.self, + to: Unicode.Latin1.self, + stoppingOnError: false, + into: { actualCodeUnits.append($0) } + ) + expectEqualSequence([0x41, 0x1A, 0x42, 0x1A, 0x43], actualCodeUnits) + expectFalse(hadError) + } + } +} + runAllTests()