From 20a91f7de84000d3e970bb8e5ba3cfed1c756ccc Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Sun, 17 Aug 2025 17:16:36 +0800 Subject: [PATCH 1/5] Add parser for LEB128 integers. --- Sources/BinaryParsing/Parsers/Integer.swift | 68 +++++++++++++++++++ .../IntegerParsingTests.swift | 18 +++++ Tests/BinaryParsingTests/TestingSupport.swift | 29 ++++++++ 3 files changed, 115 insertions(+) diff --git a/Sources/BinaryParsing/Parsers/Integer.swift b/Sources/BinaryParsing/Parsers/Integer.swift index a15b4c5..d1d864d 100644 --- a/Sources/BinaryParsing/Parsers/Integer.swift +++ b/Sources/BinaryParsing/Parsers/Integer.swift @@ -696,6 +696,74 @@ extension FixedWidthInteger where Self: BitwiseCopyable { } self = try Self(_throwing: T(truncatingIfNeeded: result)) } + + /// Creates an integer by parsing a little-endian base 128 (LEB128) encoded value of this type's size + /// from the start of the given parser span. + /// + /// - Parameter input: The `ParserSpan` to parse from. If parsing succeeds, + /// the start position of `input` is moved forward by `ceil(bitWidth / 7)` where bitWidth is + /// the minimum number of bits required to encode this integer. + /// - Throws: A `ParsingError` if `input` overflows the max value of + /// this integer type. + @inlinable + @_lifetime(&input) + public init(parsingLEB128 input: inout ParserSpan) throws(ParsingError) { + var result: Self = 0 + var shift = 0 + var byte: UInt8 = 0 + + while true { + byte = try UInt8(parsing: &input) + let bits = Self(byte & 0x7F) + + // Check for overflow before shifting + if shift >= Self.bitWidth { + // Additional bytes must be zero (or sign extension for signed) + if Self.isSigned { + let expectedByte: UInt8 = (result < 0) ? 0xFF : 0x00 + guard (byte & 0x7F) == (expectedByte & 0x7F) else { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } else { + guard (byte & 0x7F) == 0 else { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } + } else { + // Check if this would overflow our target type + let availableBits = Self.bitWidth - shift + if availableBits < 7 { + // Mask of bits that can safely fit + let allowedMask: Self = (1 << availableBits) - 1 + let extraBits = bits & ~allowedMask + if extraBits != 0 { + let isValidSignExtension = + Self.isSigned && extraBits == (~allowedMask & 0x7F) + + if !isValidSignExtension { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } + } + result |= bits << shift + } + shift += 7 + if (byte & 0x80) == 0 { break } + } + if Self.isSigned { + // Sign-extend if needed + if shift < Self.bitWidth && (byte & 0x40) != 0 { + result |= (~0) << shift + } + } + self = result + } } extension RawRepresentable where RawValue: MultiByteInteger { diff --git a/Tests/BinaryParsingTests/IntegerParsingTests.swift b/Tests/BinaryParsingTests/IntegerParsingTests.swift index 5ed8abf..e63c7f2 100644 --- a/Tests/BinaryParsingTests/IntegerParsingTests.swift +++ b/Tests/BinaryParsingTests/IntegerParsingTests.swift @@ -171,6 +171,12 @@ struct IntegerParsingTests { } } } + + do { + let lebEncoded = [UInt8](encodingLEB128: number) + let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + #expect(parsed == number) + } } try runTest(for: .zero) @@ -258,6 +264,12 @@ struct IntegerParsingTests { } } } + + do { + let lebEncoded = [UInt8](encodingLEB128: number) + let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + #expect(parsed == number) + } } try runTest(for: .zero) @@ -352,6 +364,12 @@ struct IntegerParsingTests { } } } + + do { + let lebEncoded = [UInt8](encodingLEB128: number) + let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + #expect(parsed == number) + } } try runTest(for: .zero) diff --git a/Tests/BinaryParsingTests/TestingSupport.swift b/Tests/BinaryParsingTests/TestingSupport.swift index 2aa33fb..1b48253 100644 --- a/Tests/BinaryParsingTests/TestingSupport.swift +++ b/Tests/BinaryParsingTests/TestingSupport.swift @@ -108,6 +108,35 @@ extension Array where Element == UInt8 { Swift.withUnsafeBytes(of: value.littleEndian, Array.init) + Array(repeating: paddingByte, count: paddingCount) } + + init(encodingLEB128 value: T) { + var out: [UInt8] = [] + if T.isSigned { + var v = value + while true { + var byte = UInt8(truncatingIfNeeded: v) + v >>= 6 // Keep the sign bit + let done = v == 0 || v == -1 + if done { + byte &= 0x7F + } else { + v >>= 1 + byte |= 0x80 + } + out.append(byte) + if done { break } + } + } else { + var v = value + repeat { + var byte = UInt8(truncatingIfNeeded: v) + v >>= 7 + if v != 0 { byte |= 0x80 } + out.append(byte) + } while v != 0 + } + self = out + } } /// A seeded random number generator type. From 7b38b85fcd2eda7f733b15e87999a89342e0a2f8 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Tue, 19 Aug 2025 07:55:38 +0800 Subject: [PATCH 2/5] Improve LEB128 input validation. Fix indentation. --- Sources/BinaryParsing/Parsers/Integer.swift | 23 +++++-------------- Tests/BinaryParsingTests/TestingSupport.swift | 2 +- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/Sources/BinaryParsing/Parsers/Integer.swift b/Sources/BinaryParsing/Parsers/Integer.swift index d1d864d..d57f3e0 100644 --- a/Sources/BinaryParsing/Parsers/Integer.swift +++ b/Sources/BinaryParsing/Parsers/Integer.swift @@ -711,27 +711,17 @@ extension FixedWidthInteger where Self: BitwiseCopyable { var result: Self = 0 var shift = 0 var byte: UInt8 = 0 - while true { byte = try UInt8(parsing: &input) let bits = Self(byte & 0x7F) - // Check for overflow before shifting if shift >= Self.bitWidth { // Additional bytes must be zero (or sign extension for signed) - if Self.isSigned { - let expectedByte: UInt8 = (result < 0) ? 0xFF : 0x00 - guard (byte & 0x7F) == (expectedByte & 0x7F) else { - throw ParsingError( - status: .invalidValue, - location: input.startPosition) - } - } else { - guard (byte & 0x7F) == 0 else { - throw ParsingError( - status: .invalidValue, - location: input.startPosition) - } + let expectedByte: UInt8 = (result < 0) ? 0xFF : 0x00 + guard bits == expectedByte else { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) } } else { // Check if this would overflow our target type @@ -742,8 +732,7 @@ extension FixedWidthInteger where Self: BitwiseCopyable { let extraBits = bits & ~allowedMask if extraBits != 0 { let isValidSignExtension = - Self.isSigned && extraBits == (~allowedMask & 0x7F) - + Self.isSigned && extraBits == (~allowedMask & 0x7F) if !isValidSignExtension { throw ParsingError( status: .invalidValue, diff --git a/Tests/BinaryParsingTests/TestingSupport.swift b/Tests/BinaryParsingTests/TestingSupport.swift index 1b48253..70e2652 100644 --- a/Tests/BinaryParsingTests/TestingSupport.swift +++ b/Tests/BinaryParsingTests/TestingSupport.swift @@ -115,7 +115,7 @@ extension Array where Element == UInt8 { var v = value while true { var byte = UInt8(truncatingIfNeeded: v) - v >>= 6 // Keep the sign bit + v >>= 6 // Keep the sign bit let done = v == 0 || v == -1 if done { byte &= 0x7F From 5a8ec0670bd387819cc9b84b70ad3f58878785d9 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Wed, 20 Aug 2025 22:48:37 +0800 Subject: [PATCH 3/5] Add tests for LEB128 overflow. Fixed overflow check and added a max byte count check. --- Sources/BinaryParsing/Parsers/Integer.swift | 76 ++++++++++++------- .../IntegerParsingTests.swift | 16 ++++ .../ThrowingOperationsTests.swift | 33 ++++++++ 3 files changed, 98 insertions(+), 27 deletions(-) diff --git a/Sources/BinaryParsing/Parsers/Integer.swift b/Sources/BinaryParsing/Parsers/Integer.swift index d57f3e0..35f29fd 100644 --- a/Sources/BinaryParsing/Parsers/Integer.swift +++ b/Sources/BinaryParsing/Parsers/Integer.swift @@ -701,10 +701,13 @@ extension FixedWidthInteger where Self: BitwiseCopyable { /// from the start of the given parser span. /// /// - Parameter input: The `ParserSpan` to parse from. If parsing succeeds, - /// the start position of `input` is moved forward by `ceil(bitWidth / 7)` where bitWidth is - /// the minimum number of bits required to encode this integer. - /// - Throws: A `ParsingError` if `input` overflows the max value of - /// this integer type. + /// the start position of `input` is moved forward by the number of bytes consumed. This will + /// usually be `ceil(N / 7)` where N is the minimum number of bits required to encode + /// this integer. In rare cases an encoder may produce valid but unnecessary padding bytes, + /// in which case the number of bytes consumed can be up to `ceil(bitWidth / 7)` where + /// bitWidth is the full width of this type. + /// - Throws: A `ParsingError` if `input` overflows the max value of this integer type, + /// or if the maximum byte count for this type's size has been consumed. @inlinable @_lifetime(&input) public init(parsingLEB128 input: inout ParserSpan) throws(ParsingError) { @@ -713,37 +716,56 @@ extension FixedWidthInteger where Self: BitwiseCopyable { var byte: UInt8 = 0 while true { byte = try UInt8(parsing: &input) - let bits = Self(byte & 0x7F) - // Check for overflow before shifting - if shift >= Self.bitWidth { - // Additional bytes must be zero (or sign extension for signed) - let expectedByte: UInt8 = (result < 0) ? 0xFF : 0x00 - guard bits == expectedByte else { + let lowBits = byte & 0x7F + let availableBits = Self.bitWidth - shift + let isFinalByte = (byte & 0x80) == 0 + if availableBits <= 0 { + let maxBytes = (Self.bitWidth + 6) / 7 + let byteCount = shift / 7 + 1 + if byteCount > maxBytes { throw ParsingError( status: .invalidValue, location: input.startPosition) } - } else { - // Check if this would overflow our target type - let availableBits = Self.bitWidth - shift - if availableBits < 7 { - // Mask of bits that can safely fit - let allowedMask: Self = (1 << availableBits) - 1 - let extraBits = bits & ~allowedMask - if extraBits != 0 { - let isValidSignExtension = - Self.isSigned && extraBits == (~allowedMask & 0x7F) - if !isValidSignExtension { - throw ParsingError( - status: .invalidValue, - location: input.startPosition) - } + // Allow padding bytes that do not affect the value + let expectedBits: UInt8 = (result < 0) ? 0x7F : 0x00 + guard lowBits == expectedBits else { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } else if availableBits < 7 { + let allowedMask: UInt8 = (1 &<< availableBits) &- 1 + let extraBits: UInt8 = lowBits & ~allowedMask + if Self.isSigned { + let signPadding: UInt8 = (~allowedMask) & 0x7F + guard extraBits == signPadding || extraBits == 0 else { + throw ParsingError(status: .invalidValue, location: input.startPosition) + } + } else { + guard extraBits == 0 else { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) } } - result |= bits << shift + let part = Self(lowBits & allowedMask) << shift + result |= part + if Self.isSigned && isFinalByte { + let finalByteNegative = (byte & 0x40) != 0 + let resultNegative = result & (1 << (Self.bitWidth - 1)) != 0 + if finalByteNegative != resultNegative { + // The value's sign has flipped - it has wrapped around. + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } + } else { + result |= Self(lowBits) &<< shift } shift += 7 - if (byte & 0x80) == 0 { break } + if isFinalByte { break } } if Self.isSigned { // Sign-extend if needed diff --git a/Tests/BinaryParsingTests/IntegerParsingTests.swift b/Tests/BinaryParsingTests/IntegerParsingTests.swift index e63c7f2..2e21ac4 100644 --- a/Tests/BinaryParsingTests/IntegerParsingTests.swift +++ b/Tests/BinaryParsingTests/IntegerParsingTests.swift @@ -639,4 +639,20 @@ struct IntegerParsingTests { try fuzzIntegerCasting( UInt.self, loadingFrom: UInt64.self, using: &rng) } + + /// Some LEB128 encoders output padding bytes which are considered + /// valid if the number of bytes does not exceed `ceil(bitWidth / 7)` + @Test(arguments: [ + ([0x80, 0x81, 0x80, 0x00], 0x80), + ([0xFF, 0x00], 0x7F), + ([0xFF, 0x80, 0x00], 0x7F), + ([0x80, 0x81, 0x00], 0x80), + ([0x80, 0x81, 0x80, 0x00], 0x80), + ([0xFE, 0xFF, 0x7F], -0x02), + ]) + func validPaddingLEB128(input: [Int], expected: Int) throws { + let lebEncoded = input.map(UInt8.init) + let result = try lebEncoded.withParserSpan { try Int(parsingLEB128: &$0) } + #expect(result == expected) + } } diff --git a/Tests/BinaryParsingTests/ThrowingOperationsTests.swift b/Tests/BinaryParsingTests/ThrowingOperationsTests.swift index b852868..5820e41 100644 --- a/Tests/BinaryParsingTests/ThrowingOperationsTests.swift +++ b/Tests/BinaryParsingTests/ThrowingOperationsTests.swift @@ -172,4 +172,37 @@ struct ThrowingOperationsTests { } } } + + @Test(arguments: [[0xFE, 0xFF, 0xFF, 0x7F]]) + func tooManyPaddingBytesLEB128(_ input: [Int]) throws { + let lebEncoded = input.map(UInt8.init) + #expect(throws: ParsingError.self) { + try lebEncoded.withParserSpan { try Int16(parsingLEB128: &$0) } + } + } + + @Test func overflowLEB128() async throws { + func overflowTest< + T: FixedWidthInteger & BitwiseCopyable, U: MultiByteInteger + >( + _ type: T.Type, + value: U, + ) throws { + let lebEncoded: [UInt8] = .init(encodingLEB128: value) + #expect(throws: ParsingError.self) { + try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + } + } + for i in 1...100 { + try overflowTest(Int8.self, value: Int16(Int8.min) - Int16(i)) + try overflowTest(Int8.self, value: Int16(Int8.max) + Int16(i)) + try overflowTest(UInt8.self, value: UInt16(UInt8.max) + UInt16(i)) + try overflowTest(Int16.self, value: Int32(Int16.min) - Int32(i)) + try overflowTest(Int16.self, value: Int32(Int16.max) + Int32(i)) + try overflowTest(UInt16.self, value: UInt32(UInt16.max) + UInt32(i)) + try overflowTest(Int32.self, value: Int64(Int32.min) - Int64(i)) + try overflowTest(Int32.self, value: Int64(Int32.max) + Int64(i)) + try overflowTest(UInt32.self, value: UInt64(UInt32.max) + UInt64(i)) + } + } } From 0e78c61674e928917931f125afe5d2293ee6fb47 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Mon, 25 Aug 2025 11:08:01 +0800 Subject: [PATCH 4/5] Fix formatting. --- Tests/BinaryParsingTests/IntegerParsingTests.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tests/BinaryParsingTests/IntegerParsingTests.swift b/Tests/BinaryParsingTests/IntegerParsingTests.swift index 2e21ac4..43dded7 100644 --- a/Tests/BinaryParsingTests/IntegerParsingTests.swift +++ b/Tests/BinaryParsingTests/IntegerParsingTests.swift @@ -640,8 +640,8 @@ struct IntegerParsingTests { UInt.self, loadingFrom: UInt64.self, using: &rng) } - /// Some LEB128 encoders output padding bytes which are considered - /// valid if the number of bytes does not exceed `ceil(bitWidth / 7)` + // Some LEB128 encoders output padding bytes which are considered + // valid if the number of bytes does not exceed `ceil(bitWidth / 7)`. @Test(arguments: [ ([0x80, 0x81, 0x80, 0x00], 0x80), ([0xFF, 0x00], 0x7F), From 1b034f068bb4c61b96b1195809f1dd88b4b8d9a0 Mon Sep 17 00:00:00 2001 From: Will Temperley Date: Wed, 3 Sep 2025 13:26:19 +0800 Subject: [PATCH 5/5] Fix whitespace issues. --- Sources/BinaryParsing/Parsers/Integer.swift | 5 +++-- .../BinaryParsingTests/IntegerParsingTests.swift | 16 ++++++++-------- Tests/BinaryParsingTests/TestingSupport.swift | 2 +- .../ThrowingOperationsTests.swift | 4 ++-- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/Sources/BinaryParsing/Parsers/Integer.swift b/Sources/BinaryParsing/Parsers/Integer.swift index 35f29fd..43f1bd6 100644 --- a/Sources/BinaryParsing/Parsers/Integer.swift +++ b/Sources/BinaryParsing/Parsers/Integer.swift @@ -696,7 +696,7 @@ extension FixedWidthInteger where Self: BitwiseCopyable { } self = try Self(_throwing: T(truncatingIfNeeded: result)) } - + /// Creates an integer by parsing a little-endian base 128 (LEB128) encoded value of this type's size /// from the start of the given parser span. /// @@ -740,7 +740,8 @@ extension FixedWidthInteger where Self: BitwiseCopyable { if Self.isSigned { let signPadding: UInt8 = (~allowedMask) & 0x7F guard extraBits == signPadding || extraBits == 0 else { - throw ParsingError(status: .invalidValue, location: input.startPosition) + throw ParsingError( + status: .invalidValue, location: input.startPosition) } } else { guard extraBits == 0 else { diff --git a/Tests/BinaryParsingTests/IntegerParsingTests.swift b/Tests/BinaryParsingTests/IntegerParsingTests.swift index 43dded7..96652a9 100644 --- a/Tests/BinaryParsingTests/IntegerParsingTests.swift +++ b/Tests/BinaryParsingTests/IntegerParsingTests.swift @@ -171,7 +171,7 @@ struct IntegerParsingTests { } } } - + do { let lebEncoded = [UInt8](encodingLEB128: number) let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } @@ -264,7 +264,7 @@ struct IntegerParsingTests { } } } - + do { let lebEncoded = [UInt8](encodingLEB128: number) let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } @@ -364,7 +364,7 @@ struct IntegerParsingTests { } } } - + do { let lebEncoded = [UInt8](encodingLEB128: number) let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } @@ -639,16 +639,16 @@ struct IntegerParsingTests { try fuzzIntegerCasting( UInt.self, loadingFrom: UInt64.self, using: &rng) } - + // Some LEB128 encoders output padding bytes which are considered // valid if the number of bytes does not exceed `ceil(bitWidth / 7)`. @Test(arguments: [ ([0x80, 0x81, 0x80, 0x00], 0x80), - ([0xFF, 0x00], 0x7F), - ([0xFF, 0x80, 0x00], 0x7F), - ([0x80, 0x81, 0x00], 0x80), + ([0xFF, 0x00], 0x7F), + ([0xFF, 0x80, 0x00], 0x7F), + ([0x80, 0x81, 0x00], 0x80), ([0x80, 0x81, 0x80, 0x00], 0x80), - ([0xFE, 0xFF, 0x7F], -0x02), + ([0xFE, 0xFF, 0x7F], -0x02), ]) func validPaddingLEB128(input: [Int], expected: Int) throws { let lebEncoded = input.map(UInt8.init) diff --git a/Tests/BinaryParsingTests/TestingSupport.swift b/Tests/BinaryParsingTests/TestingSupport.swift index 70e2652..7a3d551 100644 --- a/Tests/BinaryParsingTests/TestingSupport.swift +++ b/Tests/BinaryParsingTests/TestingSupport.swift @@ -108,7 +108,7 @@ extension Array where Element == UInt8 { Swift.withUnsafeBytes(of: value.littleEndian, Array.init) + Array(repeating: paddingByte, count: paddingCount) } - + init(encodingLEB128 value: T) { var out: [UInt8] = [] if T.isSigned { diff --git a/Tests/BinaryParsingTests/ThrowingOperationsTests.swift b/Tests/BinaryParsingTests/ThrowingOperationsTests.swift index 5820e41..647888d 100644 --- a/Tests/BinaryParsingTests/ThrowingOperationsTests.swift +++ b/Tests/BinaryParsingTests/ThrowingOperationsTests.swift @@ -172,7 +172,7 @@ struct ThrowingOperationsTests { } } } - + @Test(arguments: [[0xFE, 0xFF, 0xFF, 0x7F]]) func tooManyPaddingBytesLEB128(_ input: [Int]) throws { let lebEncoded = input.map(UInt8.init) @@ -180,7 +180,7 @@ struct ThrowingOperationsTests { try lebEncoded.withParserSpan { try Int16(parsingLEB128: &$0) } } } - + @Test func overflowLEB128() async throws { func overflowTest< T: FixedWidthInteger & BitwiseCopyable, U: MultiByteInteger