diff --git a/Sources/BinaryParsing/Parsers/Integer.swift b/Sources/BinaryParsing/Parsers/Integer.swift index a15b4c5..43f1bd6 100644 --- a/Sources/BinaryParsing/Parsers/Integer.swift +++ b/Sources/BinaryParsing/Parsers/Integer.swift @@ -696,6 +696,86 @@ extension FixedWidthInteger where Self: BitwiseCopyable { } self = try Self(_throwing: T(truncatingIfNeeded: result)) } + + /// Creates an integer by parsing a little-endian base 128 (LEB128) encoded value of this type's size + /// from the start of the given parser span. + /// + /// - Parameter input: The `ParserSpan` to parse from. If parsing succeeds, + /// the start position of `input` is moved forward by the number of bytes consumed. This will + /// usually be `ceil(N / 7)` where N is the minimum number of bits required to encode + /// this integer. In rare cases an encoder may produce valid but unnecessary padding bytes, + /// in which case the number of bytes consumed can be up to `ceil(bitWidth / 7)` where + /// bitWidth is the full width of this type. + /// - Throws: A `ParsingError` if `input` overflows the max value of this integer type, + /// or if the maximum byte count for this type's size has been consumed. + @inlinable + @_lifetime(&input) + public init(parsingLEB128 input: inout ParserSpan) throws(ParsingError) { + var result: Self = 0 + var shift = 0 + var byte: UInt8 = 0 + while true { + byte = try UInt8(parsing: &input) + let lowBits = byte & 0x7F + let availableBits = Self.bitWidth - shift + let isFinalByte = (byte & 0x80) == 0 + if availableBits <= 0 { + let maxBytes = (Self.bitWidth + 6) / 7 + let byteCount = shift / 7 + 1 + if byteCount > maxBytes { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + // Allow padding bytes that do not affect the value + let expectedBits: UInt8 = (result < 0) ? 0x7F : 0x00 + guard lowBits == expectedBits else { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } else if availableBits < 7 { + let allowedMask: UInt8 = (1 &<< availableBits) &- 1 + let extraBits: UInt8 = lowBits & ~allowedMask + if Self.isSigned { + let signPadding: UInt8 = (~allowedMask) & 0x7F + guard extraBits == signPadding || extraBits == 0 else { + throw ParsingError( + status: .invalidValue, location: input.startPosition) + } + } else { + guard extraBits == 0 else { + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } + let part = Self(lowBits & allowedMask) << shift + result |= part + if Self.isSigned && isFinalByte { + let finalByteNegative = (byte & 0x40) != 0 + let resultNegative = result & (1 << (Self.bitWidth - 1)) != 0 + if finalByteNegative != resultNegative { + // The value's sign has flipped - it has wrapped around. + throw ParsingError( + status: .invalidValue, + location: input.startPosition) + } + } + } else { + result |= Self(lowBits) &<< shift + } + shift += 7 + if isFinalByte { break } + } + if Self.isSigned { + // Sign-extend if needed + if shift < Self.bitWidth && (byte & 0x40) != 0 { + result |= (~0) << shift + } + } + self = result + } } extension RawRepresentable where RawValue: MultiByteInteger { diff --git a/Tests/BinaryParsingTests/IntegerParsingTests.swift b/Tests/BinaryParsingTests/IntegerParsingTests.swift index 5ed8abf..96652a9 100644 --- a/Tests/BinaryParsingTests/IntegerParsingTests.swift +++ b/Tests/BinaryParsingTests/IntegerParsingTests.swift @@ -171,6 +171,12 @@ struct IntegerParsingTests { } } } + + do { + let lebEncoded = [UInt8](encodingLEB128: number) + let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + #expect(parsed == number) + } } try runTest(for: .zero) @@ -258,6 +264,12 @@ struct IntegerParsingTests { } } } + + do { + let lebEncoded = [UInt8](encodingLEB128: number) + let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + #expect(parsed == number) + } } try runTest(for: .zero) @@ -352,6 +364,12 @@ struct IntegerParsingTests { } } } + + do { + let lebEncoded = [UInt8](encodingLEB128: number) + let parsed = try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + #expect(parsed == number) + } } try runTest(for: .zero) @@ -621,4 +639,20 @@ struct IntegerParsingTests { try fuzzIntegerCasting( UInt.self, loadingFrom: UInt64.self, using: &rng) } + + // Some LEB128 encoders output padding bytes which are considered + // valid if the number of bytes does not exceed `ceil(bitWidth / 7)`. + @Test(arguments: [ + ([0x80, 0x81, 0x80, 0x00], 0x80), + ([0xFF, 0x00], 0x7F), + ([0xFF, 0x80, 0x00], 0x7F), + ([0x80, 0x81, 0x00], 0x80), + ([0x80, 0x81, 0x80, 0x00], 0x80), + ([0xFE, 0xFF, 0x7F], -0x02), + ]) + func validPaddingLEB128(input: [Int], expected: Int) throws { + let lebEncoded = input.map(UInt8.init) + let result = try lebEncoded.withParserSpan { try Int(parsingLEB128: &$0) } + #expect(result == expected) + } } diff --git a/Tests/BinaryParsingTests/TestingSupport.swift b/Tests/BinaryParsingTests/TestingSupport.swift index 2aa33fb..7a3d551 100644 --- a/Tests/BinaryParsingTests/TestingSupport.swift +++ b/Tests/BinaryParsingTests/TestingSupport.swift @@ -108,6 +108,35 @@ extension Array where Element == UInt8 { Swift.withUnsafeBytes(of: value.littleEndian, Array.init) + Array(repeating: paddingByte, count: paddingCount) } + + init(encodingLEB128 value: T) { + var out: [UInt8] = [] + if T.isSigned { + var v = value + while true { + var byte = UInt8(truncatingIfNeeded: v) + v >>= 6 // Keep the sign bit + let done = v == 0 || v == -1 + if done { + byte &= 0x7F + } else { + v >>= 1 + byte |= 0x80 + } + out.append(byte) + if done { break } + } + } else { + var v = value + repeat { + var byte = UInt8(truncatingIfNeeded: v) + v >>= 7 + if v != 0 { byte |= 0x80 } + out.append(byte) + } while v != 0 + } + self = out + } } /// A seeded random number generator type. diff --git a/Tests/BinaryParsingTests/ThrowingOperationsTests.swift b/Tests/BinaryParsingTests/ThrowingOperationsTests.swift index b852868..647888d 100644 --- a/Tests/BinaryParsingTests/ThrowingOperationsTests.swift +++ b/Tests/BinaryParsingTests/ThrowingOperationsTests.swift @@ -172,4 +172,37 @@ struct ThrowingOperationsTests { } } } + + @Test(arguments: [[0xFE, 0xFF, 0xFF, 0x7F]]) + func tooManyPaddingBytesLEB128(_ input: [Int]) throws { + let lebEncoded = input.map(UInt8.init) + #expect(throws: ParsingError.self) { + try lebEncoded.withParserSpan { try Int16(parsingLEB128: &$0) } + } + } + + @Test func overflowLEB128() async throws { + func overflowTest< + T: FixedWidthInteger & BitwiseCopyable, U: MultiByteInteger + >( + _ type: T.Type, + value: U, + ) throws { + let lebEncoded: [UInt8] = .init(encodingLEB128: value) + #expect(throws: ParsingError.self) { + try lebEncoded.withParserSpan { try T(parsingLEB128: &$0) } + } + } + for i in 1...100 { + try overflowTest(Int8.self, value: Int16(Int8.min) - Int16(i)) + try overflowTest(Int8.self, value: Int16(Int8.max) + Int16(i)) + try overflowTest(UInt8.self, value: UInt16(UInt8.max) + UInt16(i)) + try overflowTest(Int16.self, value: Int32(Int16.min) - Int32(i)) + try overflowTest(Int16.self, value: Int32(Int16.max) + Int32(i)) + try overflowTest(UInt16.self, value: UInt32(UInt16.max) + UInt32(i)) + try overflowTest(Int32.self, value: Int64(Int32.min) - Int64(i)) + try overflowTest(Int32.self, value: Int64(Int32.max) + Int64(i)) + try overflowTest(UInt32.self, value: UInt64(UInt32.max) + UInt64(i)) + } + } }