-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Multipart] Introduce a bytes -> frames parser (#72)
[Multipart] Introduce a bytes -> frames parser ### Motivation Start landing the parts of the multipart machinery that is unlikely to change as part of the multipart proposal that's finishing review tomorrow. ### Modifications Introduce a bytes -> frames parser and an async sequence that wraps it. A "frame" is either the full header fields section or a single chunk of a part body. ### Result We can now frame bytes of a multipart body. ### Test Plan Added unit tests for the state machine, the parser, and the async sequence. Reviewed by: simonjbeaumont Builds: ✔︎ pull request validation (5.10) - Build finished. ✔︎ pull request validation (5.8) - Build finished. ✔︎ pull request validation (5.9) - Build finished. ✔︎ pull request validation (api breakage) - Build finished. ✔︎ pull request validation (docc test) - Build finished. ✔︎ pull request validation (integration test) - Build finished. ✔︎ pull request validation (nightly) - Build finished. ✔︎ pull request validation (soundness) - Build finished. #72
- Loading branch information
Showing
8 changed files
with
799 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This source file is part of the SwiftOpenAPIGenerator open source project | ||
// | ||
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors | ||
// Licensed under Apache License v2.0 | ||
// | ||
// See LICENSE.txt for license information | ||
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
/// A namespace of utilities for byte parsers and serializers. | ||
enum ASCII { | ||
|
||
/// The dash `-` character. | ||
static let dash: UInt8 = 0x2d | ||
|
||
/// The carriage return `<CR>` character. | ||
static let cr: UInt8 = 0x0d | ||
|
||
/// The line feed `<LF>` character. | ||
static let lf: UInt8 = 0x0a | ||
|
||
/// The colon `:` character. | ||
static let colon: UInt8 = 0x3a | ||
|
||
/// The space ` ` character. | ||
static let space: UInt8 = 0x20 | ||
|
||
/// The horizontal tab `<TAB>` character. | ||
static let tab: UInt8 = 0x09 | ||
|
||
/// Two dash characters. | ||
static let dashes: [UInt8] = [dash, dash] | ||
|
||
/// The `<CR>` character follow by the `<LF>` character. | ||
static let crlf: [UInt8] = [cr, lf] | ||
|
||
/// The characters that represent optional whitespace (OWS). | ||
static let optionalWhitespace: Set<UInt8> = [space, tab] | ||
|
||
/// Checks whether the provided byte can appear in a header field name. | ||
/// - Parameter byte: The byte to check. | ||
/// - Returns: A Boolean value; `true` if the byte is valid in a header field | ||
/// name, `false` otherwise. | ||
static func isValidHeaderFieldNameByte(_ byte: UInt8) -> Bool { | ||
// Copied from swift-http-types, because we create HTTPField.Name from these anyway later. | ||
switch byte { | ||
case 0x21, 0x23, 0x24, 0x25, 0x26, 0x27, 0x2A, 0x2B, 0x2D, 0x2E, 0x5E, 0x5F, 0x60, 0x7C, 0x7E: return true | ||
case 0x30...0x39, 0x41...0x5A, 0x61...0x7A: // DIGHT, ALPHA | ||
return true | ||
default: return false | ||
} | ||
} | ||
} | ||
|
||
/// A value returned by the `firstIndexAfterPrefix` method. | ||
enum FirstIndexAfterPrefixResult<C: RandomAccessCollection> { | ||
|
||
/// The index after the end of the prefix match. | ||
case index(C.Index) | ||
|
||
/// Matched all characters so far, but reached the end of self before matching all. | ||
/// When more data is fetched, it's possible this will fully match. | ||
case reachedEndOfSelf | ||
|
||
/// The character at the provided index does not match the expected character. | ||
case unexpectedPrefix(C.Index) | ||
} | ||
|
||
extension RandomAccessCollection where Element: Equatable { | ||
|
||
/// Verifies that the elements match the provided sequence and returns the first index past the match. | ||
/// - Parameter expectedElements: The elements to match against. | ||
/// - Returns: The result. | ||
func firstIndexAfterPrefix(_ expectedElements: some Sequence<Element>) -> FirstIndexAfterPrefixResult<Self> { | ||
var index = startIndex | ||
for expectedElement in expectedElements { | ||
guard index < endIndex else { return .reachedEndOfSelf } | ||
guard self[index] == expectedElement else { return .unexpectedPrefix(index) } | ||
formIndex(after: &index) | ||
} | ||
return .index(index) | ||
} | ||
} | ||
|
||
/// A value returned by the `longestMatch` method. | ||
enum LongestMatchResult<C: RandomAccessCollection> { | ||
|
||
/// No match found at any position in self. | ||
case noMatch | ||
|
||
/// Found a prefix match but reached the end of self. | ||
/// Provides the index of the first matching character. | ||
/// When more data is fetched, this might become a full match. | ||
case prefixMatch(fromIndex: C.Index) | ||
|
||
/// Found a full match within self at the provided range. | ||
case fullMatch(Range<C.Index>) | ||
} | ||
|
||
extension RandomAccessCollection where Element: Equatable { | ||
|
||
/// Returns the longest match found within the sequence. | ||
/// - Parameter expectedElements: The elements to match in the sequence. | ||
/// - Returns: The result. | ||
func longestMatch(_ expectedElements: some Sequence<Element>) -> LongestMatchResult<Self> { | ||
var index = startIndex | ||
while index < endIndex { | ||
switch self[index...].firstIndexAfterPrefix(expectedElements) { | ||
case .index(let end): return .fullMatch(index..<end) | ||
case .reachedEndOfSelf: return .prefixMatch(fromIndex: index) | ||
case .unexpectedPrefix: formIndex(after: &index) | ||
} | ||
} | ||
return .noMatch | ||
} | ||
} |
67 changes: 67 additions & 0 deletions
67
Sources/OpenAPIRuntime/Multipart/MultipartBytesToFramesSequence.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This source file is part of the SwiftOpenAPIGenerator open source project | ||
// | ||
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors | ||
// Licensed under Apache License v2.0 | ||
// | ||
// See LICENSE.txt for license information | ||
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
import HTTPTypes | ||
|
||
/// A sequence that parses multipart frames from bytes. | ||
struct MultipartBytesToFramesSequence<Upstream: AsyncSequence & Sendable>: Sendable | ||
where Upstream.Element == ArraySlice<UInt8> { | ||
|
||
/// The source of byte chunks. | ||
var upstream: Upstream | ||
|
||
/// The boundary string used to separate multipart parts. | ||
var boundary: String | ||
} | ||
|
||
extension MultipartBytesToFramesSequence: AsyncSequence { | ||
|
||
/// The type of element produced by this asynchronous sequence. | ||
typealias Element = MultipartFrame | ||
|
||
/// Creates the asynchronous iterator that produces elements of this | ||
/// asynchronous sequence. | ||
/// | ||
/// - Returns: An instance of the `AsyncIterator` type used to produce | ||
/// elements of the asynchronous sequence. | ||
func makeAsyncIterator() -> Iterator<Upstream.AsyncIterator> { | ||
Iterator(upstream: upstream.makeAsyncIterator(), boundary: boundary) | ||
} | ||
|
||
/// An iterator that pulls byte chunks from the upstream iterator and provides | ||
/// parsed multipart frames. | ||
struct Iterator<UpstreamIterator: AsyncIteratorProtocol>: AsyncIteratorProtocol | ||
where UpstreamIterator.Element == ArraySlice<UInt8> { | ||
/// The iterator that provides the byte chunks. | ||
private var upstream: UpstreamIterator | ||
|
||
/// The multipart frame parser. | ||
private var parser: MultipartParser | ||
/// Creates a new iterator from the provided source of byte chunks and a boundary string. | ||
/// - Parameters: | ||
/// - upstream: The iterator that provides the byte chunks. | ||
/// - boundary: The boundary separating the multipart parts. | ||
init(upstream: UpstreamIterator, boundary: String) { | ||
self.upstream = upstream | ||
self.parser = .init(boundary: boundary) | ||
} | ||
|
||
/// Asynchronously advances to the next element and returns it, or ends the | ||
/// sequence if there is no next element. | ||
/// | ||
/// - Returns: The next element, if it exists, or `nil` to signal the end of | ||
/// the sequence. | ||
mutating func next() async throws -> MultipartFrame? { try await parser.next { try await upstream.next() } } | ||
} | ||
} |
26 changes: 26 additions & 0 deletions
26
Sources/OpenAPIRuntime/Multipart/MultipartInternalTypes.swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This source file is part of the SwiftOpenAPIGenerator open source project | ||
// | ||
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors | ||
// Licensed under Apache License v2.0 | ||
// | ||
// See LICENSE.txt for license information | ||
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
import HTTPTypes | ||
|
||
/// A frame of a multipart message, either the whole header fields | ||
/// section or a chunk of the body bytes. | ||
enum MultipartFrame: Sendable, Hashable { | ||
|
||
/// The header fields section. | ||
case headerFields(HTTPFields) | ||
|
||
/// One byte chunk of the part's body. | ||
case bodyChunk(ArraySlice<UInt8>) | ||
} |
Oops, something went wrong.