Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Sources/BinaryParsing/Parser Types/ParserSpan.swift
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ public struct ParserSpan: ~Escapable, ~Copyable {
@inlinable
@_lifetime(copy self)
borrowing get {
_bytes._extracting(droppingFirst: _lowerBound)._extracting(first: count)
unsafe _bytes.extracting(
unchecked: Range(uncheckedBounds: (_lowerBound, _upperBound)))
}
}
}
Expand Down Expand Up @@ -130,6 +131,12 @@ extension ParserSpan {
fromUncheckedByteOffset: _lowerBound &+ i,
as: UInt8.self)
}

@usableFromInline
@_lifetime(copy self)
consuming func extracted() -> ParserSpan {
Self(bytes)
}
}

extension ParserSpan {
Expand Down
95 changes: 93 additions & 2 deletions Sources/BinaryParsing/Parser Types/Slicing.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
//
//===----------------------------------------------------------------------===//

// MARK: ParserSpan Slicing

extension ParserSpan {
/// Returns a new parser span covering the specified number of bytes from the
/// start of this parser span, shrinking this parser span by the same amount.
Expand All @@ -35,7 +37,7 @@ extension ParserSpan {
throw ParsingError(status: .invalidValue, location: startPosition)
}
guard count >= byteCount else {
throw ParsingError(status: .invalidValue, location: startPosition)
throw ParsingError(status: .insufficientData, location: startPosition)
}
return divide(atOffset: byteCount)
}
Expand Down Expand Up @@ -74,9 +76,10 @@ extension ParserSpan {
}
return try _divide(atByteOffset: byteCount)
}

}

// MARK: Range Slicing

extension ParserSpan {
/// Returns a parser range covering the specified number of bytes from the
/// start of this parser span, shrinking this parser span by the same amount.
Expand Down Expand Up @@ -154,6 +157,8 @@ extension ParserSpan {
}
}

// MARK: UTF8Span

extension ParserSpan {
/// Returns a `UTF8Span` covering the specified number of bytes from the
/// start of this parser span, shrinking this parser span by the same amount.
Expand Down Expand Up @@ -188,3 +193,89 @@ extension ParserSpan {
}
}
}

// MARK: Extracting

extension ParserSpan {
/// Extracts and returns a new parser span covering the specified number of
/// bytes from the start of this parser span, shrinking this parser span by
/// the same amount.
///
/// Use `extract(byteCount:)` to retrieve a separate span for a parsing
/// sub-task when you know the size of the task. For example, each chunk in
/// the PNG format begins with an identifier and the size of the chunk, in
/// bytes. A PNG chunk parser could use this method to slice the correct size
/// for each chunk, and limit parsing to within the resulting span.
///
/// An _extracted_ parser span doesn't retain information about the bounds of
/// the original span, unlike a _slice_. If you need to seek beyond the
/// immediate bounds of the returned span, use the ``sliceSpan(byteCount:)``
/// method instead.
///
/// - Parameter byteCount: The number of bytes to include in the resulting
/// span. `byteCount` must be non-negative, and less than or equal to the
/// number of bytes remaining in the span.
/// - Returns: A new parser span covering `byteCount` bytes. The returned
/// parser span has a `startPosition` of zero and an `endPosition` equal
/// to `byteCount`.
/// - Throws: A `ParsingError` if `byteCount` cannot be represented as an
/// `Int`, if it's negative, or if there aren't enough bytes in the
/// original span.
@inlinable
@_lifetime(copy self)
public mutating func extract(byteCount: some FixedWidthInteger)
throws(ParsingError) -> ParserSpan
{
try sliceSpan(byteCount: byteCount).extracted()
}

/// Extracts and returns a new parser span covering the specified number of
/// bytes calculated as the product of object count and stride from the start
/// of this parser span, shrinking this parser span by the same amount.
///
/// Use `extract(objectStride:objectCount:)` when you need to retrieve a
/// span for parsing a collection of fixed-size objects. This is particularly
/// useful when parsing arrays of binary data with known element sizes. For
/// example, if you're parsing an array of 4-byte integers and know there are
/// 10 elements, you can use:
///
/// let intArraySpan = try span.extract(objectStride: 4, objectCount: 10)
///
/// An _extracted_ parser span doesn't retain information about the bounds of
/// the original span, unlike a _slice_. If you need to seek beyond the
/// immediate bounds of the returned span, use the ``sliceSpan(objectStride:objectCount:)``
/// method instead.
///
/// - Parameters:
/// - objectStride: The size in bytes of each object in the collection.
/// - objectCount: The number of objects to include in the resulting range.
/// - Returns: A parser range covering `objectStride * objectCount` bytes,
/// with a `startPosition` of zero.
/// - Throws: A `ParsingError` if either `objectStride` or `objectCount`
/// cannot be represented as an `Int`, if their product would overflow, or
/// if the product is not in the range `0...count`.
@inlinable
@_lifetime(copy self)
public mutating func extract(
objectStride: some FixedWidthInteger,
objectCount: some FixedWidthInteger
) throws(ParsingError) -> ParserSpan {
try sliceSpan(objectStride: objectStride, objectCount: objectCount)
.extracted()
}

/// Extracts and returns a parser span covering the remaining bytes in this
/// parser span.
///
/// An _extracted_ parser span doesn't retain information about the bounds of
/// the original span, unlike a _slice_.
///
/// - Returns: A parser range covering the rest of the memory represented
/// by this parser span, with a `startPosition` of zero and `endPosition`
/// equal to the remaining number of bytes.
@inlinable
@_lifetime(copy self)
public mutating func extractRemaining() -> ParserSpan {
divide(atOffset: self.count).extracted()
}
}
212 changes: 212 additions & 0 deletions Tests/BinaryParsingTests/ExtractingTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Binary Parsing open source project
//
// Copyright (c) 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import BinaryParsing
import Testing

private let buffer: [UInt8] = [
0, 1, 0, 2, 0, 3, 0, 4,
0, 5, 0, 6, 0, 7, 0, 0,
]

private let emptyBuffer: [UInt8] = []

struct ExtractingTests {
@Test func extractByteCount() throws {
try buffer.withParserSpan { input in
var firstSpan = try input.extract(byteCount: 4)
#expect(firstSpan.startPosition == 0)
#expect(firstSpan.count == 4)

// Verify contents of the extracted span
let firstValue = try UInt16(parsingBigEndian: &firstSpan)
let secondValue = try UInt16(parsingBigEndian: &firstSpan)
#expect(firstValue == 1)
#expect(secondValue == 2)
#expect(firstSpan.count == 0)

// Input position should advance
#expect(input.startPosition == 4)
#expect(input.count == 12)

// Extract another span after advancing the input
_ = try input.seek(toRelativeOffset: 2)
var secondSpan = try input.extract(byteCount: 4)
#expect(secondSpan.startPosition == 0) // Extracted span starts at 0
#expect(secondSpan.count == 4)

// Verify the content of the second extracted span
let thirdValue = try UInt16(parsingBigEndian: &secondSpan)
let fourthValue = try UInt16(parsingBigEndian: &secondSpan)
#expect(thirdValue == 4)
#expect(fourthValue == 5)

// Try extracting with zero byteCount
let emptySpan = try input.extract(byteCount: 0)
#expect(emptySpan.count == 0)
#expect(emptySpan.startPosition == 0)

// Attempt to extract more than available
#expect(throws: ParsingError.self) {
_ = try input.extract(byteCount: 11)
}

// Try with negative byteCount
#expect(throws: ParsingError.self) {
_ = try input.extract(byteCount: -1)
}
}

// Test with empty buffer
try emptyBuffer.withParserSpan { input in
// Zero byteCount should succeed
let emptySpan = try input.extract(byteCount: 0)
#expect(emptySpan.count == 0)
#expect(emptySpan.startPosition == 0)

// Any positive byteCount should fail
#expect(throws: ParsingError.self) {
_ = try input.extract(byteCount: 1)
}
}
}

@Test func extractObjectCount() throws {
try buffer.withParserSpan { input in
// 2 objects of 2 bytes each
var firstSpan = try input.extract(objectStride: 2, objectCount: 2)
#expect(firstSpan.startPosition == 0)
#expect(firstSpan.count == 4)

// Verify contents of the extracted span
let firstValue = try UInt16(parsingBigEndian: &firstSpan)
let secondValue = try UInt16(parsingBigEndian: &firstSpan)
#expect(firstValue == 1)
#expect(secondValue == 2)
#expect(firstSpan.count == 0)

// 1 object of 4 bytes
var secondSpan = try input.extract(objectStride: 4, objectCount: 1)
#expect(secondSpan.startPosition == 0) // Extracted spans start at 0
#expect(secondSpan.count == 4)

// Verify contents of the second extract
let thirdValue = try UInt32(parsingBigEndian: &secondSpan)
#expect(thirdValue == 0x0003_0004)
#expect(secondSpan.count == 0)

// Input position should advance
#expect(input.startPosition == 8)
#expect(input.count == 8)

// objectCount == 0 (should create an empty extracted span)
let emptySpan = try input.extract(objectStride: 2, objectCount: 0)
#expect(emptySpan.count == 0)
#expect(emptySpan.startPosition == 0)

// objectStride == 0 (should create an empty extracted span)
let emptySpan2 = try input.extract(objectStride: 0, objectCount: 5)
#expect(emptySpan2.count == 0)
#expect(emptySpan2.startPosition == 0)

#expect(throws: ParsingError.self) {
_ = try input.extract(objectStride: 3, objectCount: 3)
}
#expect(input.startPosition == 8)
#expect(throws: ParsingError.self) {
_ = try input.extract(objectStride: -1, objectCount: 2)
}
#expect(throws: ParsingError.self) {
_ = try input.extract(objectStride: 2, objectCount: -1)
}
#expect(throws: ParsingError.self) {
_ = try input.extract(objectStride: Int.max, objectCount: 2)
}
}

// Test with empty buffer
try emptyBuffer.withParserSpan { input in
let emptySpan = try input.extract(objectStride: 4, objectCount: 0)
#expect(emptySpan.count == 0)
#expect(emptySpan.startPosition == 0)

#expect(throws: ParsingError.self) {
_ = try input.extract(objectStride: 1, objectCount: 1)
}
}
}

@Test func extractRemaining() throws {
try buffer.withParserSpan { input in
// Advance to a position within the buffer
try input.seek(toRelativeOffset: 6)

var remainingSpan = input.extractRemaining()
#expect(remainingSpan.startPosition == 0) // Extracted spans start at 0
#expect(remainingSpan.count == 10) // 16 - 6 = 10 bytes remaining

// Verify that original input is consumed & reset
#expect(input.count == 0)

// Verify we can parse the extracted remaining data
let value1 = try UInt16(parsingBigEndian: &remainingSpan)
let value2 = try UInt16(parsingBigEndian: &remainingSpan)
#expect(value1 == 4)
#expect(value2 == 5)
#expect(remainingSpan.count == 6)

// Reset to beginning and extract all
try input.seek(toAbsoluteOffset: 0)
var fullSpan = input.extractRemaining()
#expect(fullSpan.startPosition == 0)
#expect(fullSpan.count == 16)
#expect(input.count == 0)

// Parse a few values to verify it contains the full buffer data
let fullValue1 = try UInt16(parsingBigEndian: &fullSpan)
let fullValue2 = try UInt16(parsingBigEndian: &fullSpan)
#expect(fullValue1 == 1)
#expect(fullValue2 == 2)
}

// Test with empty buffer
emptyBuffer.withParserSpan { input in
let emptySpan = input.extractRemaining()
#expect(emptySpan.startPosition == 0)
#expect(emptySpan.count == 0)
#expect(input.count == 0)
}
}

@Test func extractSliceSemantics() throws {
try buffer.withParserSpan { input in
// Create slice and extract of the same data
try input.seek(toAbsoluteOffset: 4)
var slicedSpan = try input.sliceSpan(byteCount: 4)
try input.seek(toAbsoluteOffset: 4) // Go back to same position
var extractedSpan = try input.extract(byteCount: 4)

// Both should have same count...
#expect(slicedSpan.count == 4)
#expect(extractedSpan.count == 4)

// ...but different start positions
#expect(slicedSpan.startPosition == 4)
#expect(extractedSpan.startPosition == 0)

// Both should parse the same values
let sliceValue = try UInt32(parsingBigEndian: &slicedSpan)
let extractValue = try UInt32(parsingBigEndian: &extractedSpan)
#expect(sliceValue == 0x0003_0004)
#expect(sliceValue == extractValue)
}
}
}