Skip to content

Commit

Permalink
Support multi PEM parsing and speed up PEM parsing in general (#39)
Browse files Browse the repository at this point in the history
* Multi PEM support

* Multi PEM support

* swift-format

* Apply review feedback
  • Loading branch information
dnadoba committed Sep 18, 2023
1 parent 49557d7 commit 12c24ff
Show file tree
Hide file tree
Showing 2 changed files with 331 additions and 46 deletions.
248 changes: 202 additions & 46 deletions Sources/SwiftASN1/Basic ASN1 Types/PEMDocument.swift
Original file line number Diff line number Diff line change
Expand Up @@ -129,39 +129,16 @@ public struct PEMDocument: Hashable, Sendable {
public var derBytes: [UInt8]

public init(pemString: String) throws {
// A PEM document looks like this:
//
// -----BEGIN <SOME DISCRIMINATOR>-----
// <base64 encoded bytes, 64 characters per line>
// -----END <SOME DISCRIMINATOR>-----
//
// This function attempts to parse this string as a PEM document, and returns the discriminator type
// and the base64 decoded bytes.
var lines = pemString.split { $0.isNewline }[...]
guard let first = lines.first, let last = lines.last else {
throw ASN1Error.invalidPEMDocument(reason: "Leading or trailing line missing.")
}

guard let discriminator = first.pemStartDiscriminator, discriminator == last.pemEndDiscriminator else {
throw ASN1Error.invalidPEMDocument(reason: "Leading or trailing line missing PEM discriminator")
}

// All but the last line must be 64 bytes. The force unwrap is safe because we require the lines to be
// greater than zero.
lines = lines.dropFirst().dropLast()
guard lines.count > 0,
lines.dropLast().allSatisfy({ $0.utf8.count == PEMDocument.lineLength }),
lines.last!.utf8.count <= PEMDocument.lineLength
else {
throw ASN1Error.invalidPEMDocument(reason: "PEMDocument has incorrect line lengths")
}
var pemString = pemString.utf8[...]

guard let derBytes = Data(base64Encoded: lines.joined()) else {
throw ASN1Error.invalidPEMDocument(reason: "PEMDocument not correctly base64 encoded")
guard let document = try pemString.readNextPEMDocument()?.decode() else {
throw ASN1Error.invalidPEMDocument(reason: "could not find PEM start marker")
}
guard try pemString.readNextPEMDocument() == nil else {
throw ASN1Error.invalidPEMDocument(reason: "Multiple PEMDocuments found")
}

self.discriminator = discriminator
self.derBytes = Array(derBytes)
self = document
}

public init(type: String, derBytes: [UInt8]) {
Expand Down Expand Up @@ -199,32 +176,211 @@ public struct PEMDocument: Hashable, Sendable {
}
}

extension Substring {
fileprivate var pemStartDiscriminator: String? {
return self.pemDiscriminator(expectedPrefix: "-----BEGIN ", expectedSuffix: "-----")
extension PEMDocument {
/// Attempts to parse and decode multiple PEM documents inside a single String.
/// - Parameter pemString: The PEM-encoded string containing zero or more ``PEMDocument``s
/// - Returns: parsed and decoded PEM documents
public static func parseMultiple(pemString: String) throws -> [PEMDocument] {
var pemString = pemString.utf8[...]
var pemDocuments = [PEMDocument]()
while let lazyPEMDocument = try pemString.readNextPEMDocument() {
pemDocuments.append(try lazyPEMDocument.decode())
}
return pemDocuments
}
}

/// A PEM document that has not yet decoded the base64 string found between the start and end marker.
struct LazyPEMDocument {
/// `discriminator` found after BEGIN and END markers
var discriminator: Substring.UTF8View
/// The `base64EncodedDERString` is as found in the original string and will still contain new lines if present in the original.
var base64EncodedDERString: Substring.UTF8View

func decode() throws -> PEMDocument {
guard let type = String(discriminator) else {
throw ASN1Error.invalidPEMDocument(reason: "discriminator is not valid UTF-8")
}
guard let base64EncodedDERString = String(base64EncodedDERString) else {
throw ASN1Error.invalidPEMDocument(reason: "base64EncodedDERString is not valid UTF-8")
}

guard let data = Data(base64Encoded: base64EncodedDERString, options: .ignoreUnknownCharacters) else {
throw ASN1Error.invalidPEMDocument(reason: "PEMDocument not correctly base64 encoded")
}
if data.isEmpty {
throw ASN1Error.invalidPEMDocument(reason: "PEMDocument has an empty body")
}

fileprivate var pemEndDiscriminator: String? {
return self.pemDiscriminator(expectedPrefix: "-----END ", expectedSuffix: "-----")
let derBytes = Array(data)
return PEMDocument(type: type, derBytes: derBytes)
}
}

private func pemDiscriminator(expectedPrefix: String, expectedSuffix: String) -> String? {
var utf8Bytes = self.utf8[...]
extension Substring.UTF8View {
/// A PEM document looks like this:
/// ```
/// -----BEGIN <SOME DISCRIMINATOR>-----
/// <base64 encoded bytes, 64 characters per line>
/// -----END <SOME DISCRIMINATOR>-----
/// ```
/// This function attempts find the BEGIN and END marker.
/// It then tries to extract the discriminator and the base64 encoded string between the START and END marker.
fileprivate mutating func readNextPEMDocument() throws -> LazyPEMDocument? {
/// First find the BEGIN marker: `-----BEGIN <SOME DISCRIMINATOR>-----`
guard
let (
beginDiscriminatorPrefix,
beginDiscriminatorInfix,
beginDiscriminatorSuffix
) = self.firstRangesOf(
prefix: "-----BEGIN ",
suffix: "-----\n"
)
else {
return nil
}
let beginDiscriminator = self[beginDiscriminatorInfix]

/// and then find the END marker: `-----END <SOME DISCRIMINATOR>-----`
guard
let (
endDiscriminatorPrefix,
endDiscriminatorInfix,
endDiscriminatorSuffix
) = self[beginDiscriminatorSuffix.upperBound...].firstRangesOf(
prefix: "-----END ",
suffix: "-----"
)
else {
let pemBegin = self[beginDiscriminatorPrefix.lowerBound..<beginDiscriminatorSuffix.upperBound]
let pemEnd = "-----END \(beginDiscriminator)-----"
throw ASN1Error.invalidPEMDocument(
reason: "PEMDocument has \(String(reflecting: String(pemBegin))) but not \(String(reflecting: pemEnd))"
)
}
let endDiscriminator = self[endDiscriminatorInfix]

/// discriminator found in the BEGIN and END markers need to match
guard beginDiscriminator.elementsEqual(endDiscriminator) else {
throw ASN1Error.invalidPEMDocument(
reason:
"PEMDocument begin and end discriminator don't match. BEGIN: \(String(reflecting: String(beginDiscriminator))). END: \(String(reflecting: String(endDiscriminator)))"
)
}

/// everything between the BEGIN and END markers is considered the base64 encoded string
let base64EncodedDERString = self[beginDiscriminatorSuffix.upperBound..<endDiscriminatorPrefix.lowerBound]

try base64EncodedDERString.checkLineLengthsOfBase64EncodedString()

// We want to split this sequence into three parts: the prefix, the middle, and the end
let prefixSize = expectedPrefix.utf8.count
let suffixSize = expectedSuffix.utf8.count
/// move `self` to the end of the END marker
self = self[endDiscriminatorSuffix.upperBound...]

let prefix = utf8Bytes.prefix(prefixSize)
utf8Bytes = utf8Bytes.dropFirst(prefixSize)
let suffix = utf8Bytes.suffix(suffixSize)
utf8Bytes = utf8Bytes.dropLast(suffixSize)
return LazyPEMDocument(discriminator: beginDiscriminator, base64EncodedDERString: base64EncodedDERString)
}

guard prefix.elementsEqual(expectedPrefix.utf8), suffix.elementsEqual(expectedSuffix.utf8) else {
/// verify line length limits according to RFC
///
/// [4.3.2.4 Step 4: Printable Encoding](https://www.rfc-editor.org/rfc/rfc1421#section-4.3)
///
/// [...]
/// To represent the encapsulated text of a PEM message, the encoding
/// function's output is delimited into text lines (using local
/// conventions), with each line except the last containing exactly 64
/// printable characters and the final line containing 64 or fewer
/// printable characters.
///
private func checkLineLengthsOfBase64EncodedString() throws {
var message = self
let lastIndex = message.index(before: message.endIndex)
while !message.isEmpty {
let expectedNewLineIndex =
message.index(message.startIndex, offsetBy: 64, limitedBy: lastIndex) ?? lastIndex

guard
let actualNewLineIndex = message.firstIndex(of: UInt8(ascii: "\n")),
actualNewLineIndex == expectedNewLineIndex
else {
throw ASN1Error.invalidPEMDocument(reason: "PEMDocument has incorrect line lengths")
}

let nextLineStart = message.index(after: expectedNewLineIndex)

message = message[nextLineStart...]
}
}
}

extension Substring.UTF8View {
func firstRangesOf(
`prefix`: Substring,
suffix: Substring
) -> (
prefix: Range<Index>,
infix: Range<Index>,
suffix: Range<Index>
)? {
self.firstRangesOf(prefix: `prefix`.utf8, suffix: suffix.utf8)
}

func firstRangesOf(
`prefix`: Self,
suffix: Self
) -> (
prefix: Range<Index>,
infix: Range<Index>,
suffix: Range<Index>
)? {
guard
let prefixRange = self.firstRange(of: `prefix`),
let suffixRange = self[prefixRange.upperBound...].firstRange(of: suffix)
else {
return nil
}
return (
prefix: prefixRange,
infix: prefixRange.upperBound..<suffixRange.lowerBound,
suffix: suffixRange
)
}

return String(utf8Bytes)
func firstRange(of other: Self) -> Range<Index>? {
guard other.count >= 1 else {
return self.startIndex..<self.startIndex
}
let otherWithoutFirst = other.dropFirst()

var currentSearchRange = self
while currentSearchRange.count >= other.count {
// find the first occurrence of first element in other
guard let firstIndexOfOther = currentSearchRange.firstIndex(of: other.first!) else {
return nil
}
// this is now the start of a potential match.
// we have already checked the first element so we can skip that and
// continue our search from the second element
let secondIndexOfOther = currentSearchRange.index(after: firstIndexOfOther)
guard
let searchEndIndex = currentSearchRange.index(
firstIndexOfOther,
offsetBy: other.count,
limitedBy: currentSearchRange.endIndex
)
else {
// not enough elements remaining
return nil
}
// check that all elements are equal
if currentSearchRange[secondIndexOfOther..<searchEndIndex].elementsEqual(otherWithoutFirst) {
// we found a match
return firstIndexOfOther..<searchEndIndex
} else {
// we continue our search one after the current match
currentSearchRange = self[secondIndexOfOther...]
}
}
return nil
}
}

Expand Down
Loading

0 comments on commit 12c24ff

Please sign in to comment.