From 090a1d45c281bfc1ef37a00515fe202032905ef2 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 10 Sep 2025 13:20:10 -0500 Subject: [PATCH] Add `extracting` APIs that reset span bounds This change adds three `ParserSpan.extracting` APIs that yield new `ParserSpan` instances that have their internal span boundaries shrunk to fit, instead of just insetting the boundaries at the `ParserSpan` level. An extracted span can be passed off to a parsing function without worry that an absolute seek will go beyond the visible boundaries of the span. --- .../Parser Types/ParserSpan.swift | 9 +- .../BinaryParsing/Parser Types/Slicing.swift | 95 +++++++- .../BinaryParsingTests/ExtractingTests.swift | 212 ++++++++++++++++++ 3 files changed, 313 insertions(+), 3 deletions(-) create mode 100644 Tests/BinaryParsingTests/ExtractingTests.swift diff --git a/Sources/BinaryParsing/Parser Types/ParserSpan.swift b/Sources/BinaryParsing/Parser Types/ParserSpan.swift index 4fa7741..34525b4 100644 --- a/Sources/BinaryParsing/Parser Types/ParserSpan.swift +++ b/Sources/BinaryParsing/Parser Types/ParserSpan.swift @@ -69,7 +69,8 @@ public struct ParserSpan: ~Escapable, ~Copyable { @inlinable @_lifetime(copy self) borrowing get { - _bytes._extracting(droppingFirst: _lowerBound)._extracting(first: count) + unsafe _bytes.extracting( + unchecked: Range(uncheckedBounds: (_lowerBound, _upperBound))) } } } @@ -130,6 +131,12 @@ extension ParserSpan { fromUncheckedByteOffset: _lowerBound &+ i, as: UInt8.self) } + + @usableFromInline + @_lifetime(copy self) + consuming func extracted() -> ParserSpan { + Self(bytes) + } } extension ParserSpan { diff --git a/Sources/BinaryParsing/Parser Types/Slicing.swift b/Sources/BinaryParsing/Parser Types/Slicing.swift index 185cd28..ba39e4a 100644 --- a/Sources/BinaryParsing/Parser Types/Slicing.swift +++ b/Sources/BinaryParsing/Parser Types/Slicing.swift @@ -9,6 +9,8 @@ // //===----------------------------------------------------------------------===// +// MARK: ParserSpan Slicing + extension ParserSpan { /// Returns a new parser span covering the specified number of bytes from the /// start of this parser span, shrinking this parser span by the same amount. @@ -35,7 +37,7 @@ extension ParserSpan { throw ParsingError(status: .invalidValue, location: startPosition) } guard count >= byteCount else { - throw ParsingError(status: .invalidValue, location: startPosition) + throw ParsingError(status: .insufficientData, location: startPosition) } return divide(atOffset: byteCount) } @@ -74,9 +76,10 @@ extension ParserSpan { } return try _divide(atByteOffset: byteCount) } - } +// MARK: Range Slicing + extension ParserSpan { /// Returns a parser range covering the specified number of bytes from the /// start of this parser span, shrinking this parser span by the same amount. @@ -154,6 +157,8 @@ extension ParserSpan { } } +// MARK: UTF8Span + extension ParserSpan { /// Returns a `UTF8Span` covering the specified number of bytes from the /// start of this parser span, shrinking this parser span by the same amount. @@ -188,3 +193,89 @@ extension ParserSpan { } } } + +// MARK: Extracting + +extension ParserSpan { + /// Extracts and returns a new parser span covering the specified number of + /// bytes from the start of this parser span, shrinking this parser span by + /// the same amount. + /// + /// Use `extract(byteCount:)` to retrieve a separate span for a parsing + /// sub-task when you know the size of the task. For example, each chunk in + /// the PNG format begins with an identifier and the size of the chunk, in + /// bytes. A PNG chunk parser could use this method to slice the correct size + /// for each chunk, and limit parsing to within the resulting span. + /// + /// An _extracted_ parser span doesn't retain information about the bounds of + /// the original span, unlike a _slice_. If you need to seek beyond the + /// immediate bounds of the returned span, use the ``sliceSpan(byteCount:)`` + /// method instead. + /// + /// - Parameter byteCount: The number of bytes to include in the resulting + /// span. `byteCount` must be non-negative, and less than or equal to the + /// number of bytes remaining in the span. + /// - Returns: A new parser span covering `byteCount` bytes. The returned + /// parser span has a `startPosition` of zero and an `endPosition` equal + /// to `byteCount`. + /// - Throws: A `ParsingError` if `byteCount` cannot be represented as an + /// `Int`, if it's negative, or if there aren't enough bytes in the + /// original span. + @inlinable + @_lifetime(copy self) + public mutating func extract(byteCount: some FixedWidthInteger) + throws(ParsingError) -> ParserSpan + { + try sliceSpan(byteCount: byteCount).extracted() + } + + /// Extracts and returns a new parser span covering the specified number of + /// bytes calculated as the product of object count and stride from the start + /// of this parser span, shrinking this parser span by the same amount. + /// + /// Use `extract(objectStride:objectCount:)` when you need to retrieve a + /// span for parsing a collection of fixed-size objects. This is particularly + /// useful when parsing arrays of binary data with known element sizes. For + /// example, if you're parsing an array of 4-byte integers and know there are + /// 10 elements, you can use: + /// + /// let intArraySpan = try span.extract(objectStride: 4, objectCount: 10) + /// + /// An _extracted_ parser span doesn't retain information about the bounds of + /// the original span, unlike a _slice_. If you need to seek beyond the + /// immediate bounds of the returned span, use the ``sliceSpan(objectStride:objectCount:)`` + /// method instead. + /// + /// - Parameters: + /// - objectStride: The size in bytes of each object in the collection. + /// - objectCount: The number of objects to include in the resulting range. + /// - Returns: A parser range covering `objectStride * objectCount` bytes, + /// with a `startPosition` of zero. + /// - Throws: A `ParsingError` if either `objectStride` or `objectCount` + /// cannot be represented as an `Int`, if their product would overflow, or + /// if the product is not in the range `0...count`. + @inlinable + @_lifetime(copy self) + public mutating func extract( + objectStride: some FixedWidthInteger, + objectCount: some FixedWidthInteger + ) throws(ParsingError) -> ParserSpan { + try sliceSpan(objectStride: objectStride, objectCount: objectCount) + .extracted() + } + + /// Extracts and returns a parser span covering the remaining bytes in this + /// parser span. + /// + /// An _extracted_ parser span doesn't retain information about the bounds of + /// the original span, unlike a _slice_. + /// + /// - Returns: A parser range covering the rest of the memory represented + /// by this parser span, with a `startPosition` of zero and `endPosition` + /// equal to the remaining number of bytes. + @inlinable + @_lifetime(copy self) + public mutating func extractRemaining() -> ParserSpan { + divide(atOffset: self.count).extracted() + } +} diff --git a/Tests/BinaryParsingTests/ExtractingTests.swift b/Tests/BinaryParsingTests/ExtractingTests.swift new file mode 100644 index 0000000..ab556f6 --- /dev/null +++ b/Tests/BinaryParsingTests/ExtractingTests.swift @@ -0,0 +1,212 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Binary Parsing open source project +// +// Copyright (c) 2025 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import BinaryParsing +import Testing + +private let buffer: [UInt8] = [ + 0, 1, 0, 2, 0, 3, 0, 4, + 0, 5, 0, 6, 0, 7, 0, 0, +] + +private let emptyBuffer: [UInt8] = [] + +struct ExtractingTests { + @Test func extractByteCount() throws { + try buffer.withParserSpan { input in + var firstSpan = try input.extract(byteCount: 4) + #expect(firstSpan.startPosition == 0) + #expect(firstSpan.count == 4) + + // Verify contents of the extracted span + let firstValue = try UInt16(parsingBigEndian: &firstSpan) + let secondValue = try UInt16(parsingBigEndian: &firstSpan) + #expect(firstValue == 1) + #expect(secondValue == 2) + #expect(firstSpan.count == 0) + + // Input position should advance + #expect(input.startPosition == 4) + #expect(input.count == 12) + + // Extract another span after advancing the input + _ = try input.seek(toRelativeOffset: 2) + var secondSpan = try input.extract(byteCount: 4) + #expect(secondSpan.startPosition == 0) // Extracted span starts at 0 + #expect(secondSpan.count == 4) + + // Verify the content of the second extracted span + let thirdValue = try UInt16(parsingBigEndian: &secondSpan) + let fourthValue = try UInt16(parsingBigEndian: &secondSpan) + #expect(thirdValue == 4) + #expect(fourthValue == 5) + + // Try extracting with zero byteCount + let emptySpan = try input.extract(byteCount: 0) + #expect(emptySpan.count == 0) + #expect(emptySpan.startPosition == 0) + + // Attempt to extract more than available + #expect(throws: ParsingError.self) { + _ = try input.extract(byteCount: 11) + } + + // Try with negative byteCount + #expect(throws: ParsingError.self) { + _ = try input.extract(byteCount: -1) + } + } + + // Test with empty buffer + try emptyBuffer.withParserSpan { input in + // Zero byteCount should succeed + let emptySpan = try input.extract(byteCount: 0) + #expect(emptySpan.count == 0) + #expect(emptySpan.startPosition == 0) + + // Any positive byteCount should fail + #expect(throws: ParsingError.self) { + _ = try input.extract(byteCount: 1) + } + } + } + + @Test func extractObjectCount() throws { + try buffer.withParserSpan { input in + // 2 objects of 2 bytes each + var firstSpan = try input.extract(objectStride: 2, objectCount: 2) + #expect(firstSpan.startPosition == 0) + #expect(firstSpan.count == 4) + + // Verify contents of the extracted span + let firstValue = try UInt16(parsingBigEndian: &firstSpan) + let secondValue = try UInt16(parsingBigEndian: &firstSpan) + #expect(firstValue == 1) + #expect(secondValue == 2) + #expect(firstSpan.count == 0) + + // 1 object of 4 bytes + var secondSpan = try input.extract(objectStride: 4, objectCount: 1) + #expect(secondSpan.startPosition == 0) // Extracted spans start at 0 + #expect(secondSpan.count == 4) + + // Verify contents of the second extract + let thirdValue = try UInt32(parsingBigEndian: &secondSpan) + #expect(thirdValue == 0x0003_0004) + #expect(secondSpan.count == 0) + + // Input position should advance + #expect(input.startPosition == 8) + #expect(input.count == 8) + + // objectCount == 0 (should create an empty extracted span) + let emptySpan = try input.extract(objectStride: 2, objectCount: 0) + #expect(emptySpan.count == 0) + #expect(emptySpan.startPosition == 0) + + // objectStride == 0 (should create an empty extracted span) + let emptySpan2 = try input.extract(objectStride: 0, objectCount: 5) + #expect(emptySpan2.count == 0) + #expect(emptySpan2.startPosition == 0) + + #expect(throws: ParsingError.self) { + _ = try input.extract(objectStride: 3, objectCount: 3) + } + #expect(input.startPosition == 8) + #expect(throws: ParsingError.self) { + _ = try input.extract(objectStride: -1, objectCount: 2) + } + #expect(throws: ParsingError.self) { + _ = try input.extract(objectStride: 2, objectCount: -1) + } + #expect(throws: ParsingError.self) { + _ = try input.extract(objectStride: Int.max, objectCount: 2) + } + } + + // Test with empty buffer + try emptyBuffer.withParserSpan { input in + let emptySpan = try input.extract(objectStride: 4, objectCount: 0) + #expect(emptySpan.count == 0) + #expect(emptySpan.startPosition == 0) + + #expect(throws: ParsingError.self) { + _ = try input.extract(objectStride: 1, objectCount: 1) + } + } + } + + @Test func extractRemaining() throws { + try buffer.withParserSpan { input in + // Advance to a position within the buffer + try input.seek(toRelativeOffset: 6) + + var remainingSpan = input.extractRemaining() + #expect(remainingSpan.startPosition == 0) // Extracted spans start at 0 + #expect(remainingSpan.count == 10) // 16 - 6 = 10 bytes remaining + + // Verify that original input is consumed & reset + #expect(input.count == 0) + + // Verify we can parse the extracted remaining data + let value1 = try UInt16(parsingBigEndian: &remainingSpan) + let value2 = try UInt16(parsingBigEndian: &remainingSpan) + #expect(value1 == 4) + #expect(value2 == 5) + #expect(remainingSpan.count == 6) + + // Reset to beginning and extract all + try input.seek(toAbsoluteOffset: 0) + var fullSpan = input.extractRemaining() + #expect(fullSpan.startPosition == 0) + #expect(fullSpan.count == 16) + #expect(input.count == 0) + + // Parse a few values to verify it contains the full buffer data + let fullValue1 = try UInt16(parsingBigEndian: &fullSpan) + let fullValue2 = try UInt16(parsingBigEndian: &fullSpan) + #expect(fullValue1 == 1) + #expect(fullValue2 == 2) + } + + // Test with empty buffer + emptyBuffer.withParserSpan { input in + let emptySpan = input.extractRemaining() + #expect(emptySpan.startPosition == 0) + #expect(emptySpan.count == 0) + #expect(input.count == 0) + } + } + + @Test func extractSliceSemantics() throws { + try buffer.withParserSpan { input in + // Create slice and extract of the same data + try input.seek(toAbsoluteOffset: 4) + var slicedSpan = try input.sliceSpan(byteCount: 4) + try input.seek(toAbsoluteOffset: 4) // Go back to same position + var extractedSpan = try input.extract(byteCount: 4) + + // Both should have same count... + #expect(slicedSpan.count == 4) + #expect(extractedSpan.count == 4) + + // ...but different start positions + #expect(slicedSpan.startPosition == 4) + #expect(extractedSpan.startPosition == 0) + + // Both should parse the same values + let sliceValue = try UInt32(parsingBigEndian: &slicedSpan) + let extractValue = try UInt32(parsingBigEndian: &extractedSpan) + #expect(sliceValue == 0x0003_0004) + #expect(sliceValue == extractValue) + } + } +}