From 090a1d45c281bfc1ef37a00515fe202032905ef2 Mon Sep 17 00:00:00 2001
From: Nate Cook <natecook@apple.com>
Date: Wed, 10 Sep 2025 13:20:10 -0500
Subject: [PATCH] Add `extracting` APIs that reset span bounds

This change adds three `ParserSpan.extracting` APIs that yield new
`ParserSpan` instances that have their internal span boundaries shrunk
to fit, instead of just insetting the boundaries at the `ParserSpan`
level. An extracted span can be passed off to a parsing function
without worry that an absolute seek will go beyond the visible
boundaries of the span.
---
 .../Parser Types/ParserSpan.swift             |   9 +-
 .../BinaryParsing/Parser Types/Slicing.swift  |  95 +++++++-
 .../BinaryParsingTests/ExtractingTests.swift  | 212 ++++++++++++++++++
 3 files changed, 313 insertions(+), 3 deletions(-)
 create mode 100644 Tests/BinaryParsingTests/ExtractingTests.swift

diff --git a/Sources/BinaryParsing/Parser Types/ParserSpan.swift b/Sources/BinaryParsing/Parser Types/ParserSpan.swift
index 4fa7741..34525b4 100644
--- a/Sources/BinaryParsing/Parser Types/ParserSpan.swift	
+++ b/Sources/BinaryParsing/Parser Types/ParserSpan.swift	
@@ -69,7 +69,8 @@ public struct ParserSpan: ~Escapable, ~Copyable {
     @inlinable
     @_lifetime(copy self)
     borrowing get {
-      _bytes._extracting(droppingFirst: _lowerBound)._extracting(first: count)
+      unsafe _bytes.extracting(
+        unchecked: Range(uncheckedBounds: (_lowerBound, _upperBound)))
     }
   }
 }
@@ -130,6 +131,12 @@ extension ParserSpan {
       fromUncheckedByteOffset: _lowerBound &+ i,
       as: UInt8.self)
   }
+
+  @usableFromInline
+  @_lifetime(copy self)
+  consuming func extracted() -> ParserSpan {
+    Self(bytes)
+  }
 }
 
 extension ParserSpan {
diff --git a/Sources/BinaryParsing/Parser Types/Slicing.swift b/Sources/BinaryParsing/Parser Types/Slicing.swift
index 185cd28..ba39e4a 100644
--- a/Sources/BinaryParsing/Parser Types/Slicing.swift	
+++ b/Sources/BinaryParsing/Parser Types/Slicing.swift	
@@ -9,6 +9,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+// MARK: ParserSpan Slicing
+
 extension ParserSpan {
   /// Returns a new parser span covering the specified number of bytes from the
   /// start of this parser span, shrinking this parser span by the same amount.
@@ -35,7 +37,7 @@ extension ParserSpan {
       throw ParsingError(status: .invalidValue, location: startPosition)
     }
     guard count >= byteCount else {
-      throw ParsingError(status: .invalidValue, location: startPosition)
+      throw ParsingError(status: .insufficientData, location: startPosition)
     }
     return divide(atOffset: byteCount)
   }
@@ -74,9 +76,10 @@ extension ParserSpan {
     }
     return try _divide(atByteOffset: byteCount)
   }
-
 }
 
+// MARK: Range Slicing
+
 extension ParserSpan {
   /// Returns a parser range covering the specified number of bytes from the
   /// start of this parser span, shrinking this parser span by the same amount.
@@ -154,6 +157,8 @@ extension ParserSpan {
   }
 }
 
+// MARK: UTF8Span
+
 extension ParserSpan {
   /// Returns a `UTF8Span` covering the specified number of bytes from the
   /// start of this parser span, shrinking this parser span by the same amount.
@@ -188,3 +193,89 @@ extension ParserSpan {
     }
   }
 }
+
+// MARK: Extracting
+
+extension ParserSpan {
+  /// Extracts and returns a new parser span covering the specified number of
+  /// bytes from the start of this parser span, shrinking this parser span by
+  /// the same amount.
+  ///
+  /// Use `extract(byteCount:)` to retrieve a separate span for a parsing
+  /// sub-task when you know the size of the task. For example, each chunk in
+  /// the PNG format begins with an identifier and the size of the chunk, in
+  /// bytes. A PNG chunk parser could use this method to slice the correct size
+  /// for each chunk, and limit parsing to within the resulting span.
+  ///
+  /// An _extracted_ parser span doesn't retain information about the bounds of
+  /// the original span, unlike a _slice_. If you need to seek beyond the
+  /// immediate bounds of the returned span, use the ``sliceSpan(byteCount:)``
+  /// method instead.
+  ///
+  /// - Parameter byteCount: The number of bytes to include in the resulting
+  ///   span. `byteCount` must be non-negative, and less than or equal to the
+  ///   number of bytes remaining in the span.
+  /// - Returns: A new parser span covering `byteCount` bytes. The returned
+  ///   parser span has a `startPosition` of zero and an `endPosition` equal
+  ///   to `byteCount`.
+  /// - Throws: A `ParsingError` if `byteCount` cannot be represented as an
+  ///   `Int`, if it's negative, or if there aren't enough bytes in the
+  ///   original span.
+  @inlinable
+  @_lifetime(copy self)
+  public mutating func extract(byteCount: some FixedWidthInteger)
+    throws(ParsingError) -> ParserSpan
+  {
+    try sliceSpan(byteCount: byteCount).extracted()
+  }
+
+  /// Extracts and returns a new parser span covering the specified number of
+  /// bytes calculated as the product of object count and stride from the start
+  /// of this parser span, shrinking this parser span by the same amount.
+  ///
+  /// Use `extract(objectStride:objectCount:)` when you need to retrieve a
+  /// span for parsing a collection of fixed-size objects. This is particularly
+  /// useful when parsing arrays of binary data with known element sizes. For
+  /// example, if you're parsing an array of 4-byte integers and know there are
+  /// 10 elements, you can use:
+  ///
+  ///     let intArraySpan = try span.extract(objectStride: 4, objectCount: 10)
+  ///
+  /// An _extracted_ parser span doesn't retain information about the bounds of
+  /// the original span, unlike a _slice_. If you need to seek beyond the
+  /// immediate bounds of the returned span, use the ``sliceSpan(objectStride:objectCount:)``
+  /// method instead.
+  ///
+  /// - Parameters:
+  ///   - objectStride: The size in bytes of each object in the collection.
+  ///   - objectCount: The number of objects to include in the resulting range.
+  /// - Returns: A parser range covering `objectStride * objectCount` bytes,
+  ///   with a `startPosition` of zero.
+  /// - Throws: A `ParsingError` if either `objectStride` or `objectCount`
+  ///   cannot be represented as an `Int`, if their product would overflow, or
+  ///   if the product is not in the range `0...count`.
+  @inlinable
+  @_lifetime(copy self)
+  public mutating func extract(
+    objectStride: some FixedWidthInteger,
+    objectCount: some FixedWidthInteger
+  ) throws(ParsingError) -> ParserSpan {
+    try sliceSpan(objectStride: objectStride, objectCount: objectCount)
+      .extracted()
+  }
+
+  /// Extracts and returns a parser span covering the remaining bytes in this
+  /// parser span.
+  ///
+  /// An _extracted_ parser span doesn't retain information about the bounds of
+  /// the original span, unlike a _slice_.
+  ///
+  /// - Returns: A parser range covering the rest of the memory represented
+  ///   by this parser span, with a `startPosition` of zero and `endPosition`
+  ///   equal to the remaining number of bytes.
+  @inlinable
+  @_lifetime(copy self)
+  public mutating func extractRemaining() -> ParserSpan {
+    divide(atOffset: self.count).extracted()
+  }
+}
diff --git a/Tests/BinaryParsingTests/ExtractingTests.swift b/Tests/BinaryParsingTests/ExtractingTests.swift
new file mode 100644
index 0000000..ab556f6
--- /dev/null
+++ b/Tests/BinaryParsingTests/ExtractingTests.swift
@@ -0,0 +1,212 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift Binary Parsing open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+import BinaryParsing
+import Testing
+
+private let buffer: [UInt8] = [
+  0, 1, 0, 2, 0, 3, 0, 4,
+  0, 5, 0, 6, 0, 7, 0, 0,
+]
+
+private let emptyBuffer: [UInt8] = []
+
+struct ExtractingTests {
+  @Test func extractByteCount() throws {
+    try buffer.withParserSpan { input in
+      var firstSpan = try input.extract(byteCount: 4)
+      #expect(firstSpan.startPosition == 0)
+      #expect(firstSpan.count == 4)
+
+      // Verify contents of the extracted span
+      let firstValue = try UInt16(parsingBigEndian: &firstSpan)
+      let secondValue = try UInt16(parsingBigEndian: &firstSpan)
+      #expect(firstValue == 1)
+      #expect(secondValue == 2)
+      #expect(firstSpan.count == 0)
+
+      // Input position should advance
+      #expect(input.startPosition == 4)
+      #expect(input.count == 12)
+
+      // Extract another span after advancing the input
+      _ = try input.seek(toRelativeOffset: 2)
+      var secondSpan = try input.extract(byteCount: 4)
+      #expect(secondSpan.startPosition == 0)  // Extracted span starts at 0
+      #expect(secondSpan.count == 4)
+
+      // Verify the content of the second extracted span
+      let thirdValue = try UInt16(parsingBigEndian: &secondSpan)
+      let fourthValue = try UInt16(parsingBigEndian: &secondSpan)
+      #expect(thirdValue == 4)
+      #expect(fourthValue == 5)
+
+      // Try extracting with zero byteCount
+      let emptySpan = try input.extract(byteCount: 0)
+      #expect(emptySpan.count == 0)
+      #expect(emptySpan.startPosition == 0)
+
+      // Attempt to extract more than available
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(byteCount: 11)
+      }
+
+      // Try with negative byteCount
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(byteCount: -1)
+      }
+    }
+
+    // Test with empty buffer
+    try emptyBuffer.withParserSpan { input in
+      // Zero byteCount should succeed
+      let emptySpan = try input.extract(byteCount: 0)
+      #expect(emptySpan.count == 0)
+      #expect(emptySpan.startPosition == 0)
+
+      // Any positive byteCount should fail
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(byteCount: 1)
+      }
+    }
+  }
+
+  @Test func extractObjectCount() throws {
+    try buffer.withParserSpan { input in
+      // 2 objects of 2 bytes each
+      var firstSpan = try input.extract(objectStride: 2, objectCount: 2)
+      #expect(firstSpan.startPosition == 0)
+      #expect(firstSpan.count == 4)
+
+      // Verify contents of the extracted span
+      let firstValue = try UInt16(parsingBigEndian: &firstSpan)
+      let secondValue = try UInt16(parsingBigEndian: &firstSpan)
+      #expect(firstValue == 1)
+      #expect(secondValue == 2)
+      #expect(firstSpan.count == 0)
+
+      // 1 object of 4 bytes
+      var secondSpan = try input.extract(objectStride: 4, objectCount: 1)
+      #expect(secondSpan.startPosition == 0)  // Extracted spans start at 0
+      #expect(secondSpan.count == 4)
+
+      // Verify contents of the second extract
+      let thirdValue = try UInt32(parsingBigEndian: &secondSpan)
+      #expect(thirdValue == 0x0003_0004)
+      #expect(secondSpan.count == 0)
+
+      // Input position should advance
+      #expect(input.startPosition == 8)
+      #expect(input.count == 8)
+
+      // objectCount == 0 (should create an empty extracted span)
+      let emptySpan = try input.extract(objectStride: 2, objectCount: 0)
+      #expect(emptySpan.count == 0)
+      #expect(emptySpan.startPosition == 0)
+
+      // objectStride == 0 (should create an empty extracted span)
+      let emptySpan2 = try input.extract(objectStride: 0, objectCount: 5)
+      #expect(emptySpan2.count == 0)
+      #expect(emptySpan2.startPosition == 0)
+
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(objectStride: 3, objectCount: 3)
+      }
+      #expect(input.startPosition == 8)
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(objectStride: -1, objectCount: 2)
+      }
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(objectStride: 2, objectCount: -1)
+      }
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(objectStride: Int.max, objectCount: 2)
+      }
+    }
+
+    // Test with empty buffer
+    try emptyBuffer.withParserSpan { input in
+      let emptySpan = try input.extract(objectStride: 4, objectCount: 0)
+      #expect(emptySpan.count == 0)
+      #expect(emptySpan.startPosition == 0)
+
+      #expect(throws: ParsingError.self) {
+        _ = try input.extract(objectStride: 1, objectCount: 1)
+      }
+    }
+  }
+
+  @Test func extractRemaining() throws {
+    try buffer.withParserSpan { input in
+      // Advance to a position within the buffer
+      try input.seek(toRelativeOffset: 6)
+
+      var remainingSpan = input.extractRemaining()
+      #expect(remainingSpan.startPosition == 0)  // Extracted spans start at 0
+      #expect(remainingSpan.count == 10)  // 16 - 6 = 10 bytes remaining
+
+      // Verify that original input is consumed & reset
+      #expect(input.count == 0)
+
+      // Verify we can parse the extracted remaining data
+      let value1 = try UInt16(parsingBigEndian: &remainingSpan)
+      let value2 = try UInt16(parsingBigEndian: &remainingSpan)
+      #expect(value1 == 4)
+      #expect(value2 == 5)
+      #expect(remainingSpan.count == 6)
+
+      // Reset to beginning and extract all
+      try input.seek(toAbsoluteOffset: 0)
+      var fullSpan = input.extractRemaining()
+      #expect(fullSpan.startPosition == 0)
+      #expect(fullSpan.count == 16)
+      #expect(input.count == 0)
+
+      // Parse a few values to verify it contains the full buffer data
+      let fullValue1 = try UInt16(parsingBigEndian: &fullSpan)
+      let fullValue2 = try UInt16(parsingBigEndian: &fullSpan)
+      #expect(fullValue1 == 1)
+      #expect(fullValue2 == 2)
+    }
+
+    // Test with empty buffer
+    emptyBuffer.withParserSpan { input in
+      let emptySpan = input.extractRemaining()
+      #expect(emptySpan.startPosition == 0)
+      #expect(emptySpan.count == 0)
+      #expect(input.count == 0)
+    }
+  }
+
+  @Test func extractSliceSemantics() throws {
+    try buffer.withParserSpan { input in
+      // Create slice and extract of the same data
+      try input.seek(toAbsoluteOffset: 4)
+      var slicedSpan = try input.sliceSpan(byteCount: 4)
+      try input.seek(toAbsoluteOffset: 4)  // Go back to same position
+      var extractedSpan = try input.extract(byteCount: 4)
+
+      // Both should have same count...
+      #expect(slicedSpan.count == 4)
+      #expect(extractedSpan.count == 4)
+
+      // ...but different start positions
+      #expect(slicedSpan.startPosition == 4)
+      #expect(extractedSpan.startPosition == 0)
+
+      // Both should parse the same values
+      let sliceValue = try UInt32(parsingBigEndian: &slicedSpan)
+      let extractValue = try UInt32(parsingBigEndian: &extractedSpan)
+      #expect(sliceValue == 0x0003_0004)
+      #expect(sliceValue == extractValue)
+    }
+  }
+}