Skip to content

Commit

Permalink
Restrict the maximum nesting level in the parser to avoid stack overf…
Browse files Browse the repository at this point in the history
…lows
  • Loading branch information
ahoppen committed Oct 26, 2022
1 parent 9e9943b commit 20a8fe4
Show file tree
Hide file tree
Showing 13 changed files with 205 additions and 53 deletions.
22 changes: 21 additions & 1 deletion Sources/SwiftParser/Declarations.swift
Expand Up @@ -358,8 +358,18 @@ extension Parser {

@_spi(RawSyntax)
public mutating func parseGenericParameters() -> RawGenericParameterClauseSyntax {
assert(self.currentToken.starts(with: "<"))
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawGenericParameterClauseSyntax(
remainingTokens,
leftAngleBracket: missingToken(.leftAngle),
genericParameterList: RawGenericParameterListSyntax(elements: [], arena: self.arena),
genericWhereClause: nil,
rightAngleBracket: missingToken(.rightAngle),
arena: self.arena
)
}

assert(self.currentToken.starts(with: "<"))
let langle = self.consumeAnyToken(remapping: .leftAngle)
var elements = [RawGenericParameterSyntax]()
do {
Expand Down Expand Up @@ -611,6 +621,16 @@ extension Parser {
extension Parser {
@_spi(RawSyntax)
public mutating func parseMemberDeclListItem() -> RawMemberDeclListItemSyntax? {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
let item = RawMemberDeclListItemSyntax(
remainingTokens,
decl: RawDeclSyntax(RawMissingDeclSyntax(attributes: nil, modifiers: nil, arena: self.arena)),
semicolon: nil,
arena: self.arena
)
return item
}

let decl: RawDeclSyntax
if self.at(.poundSourceLocationKeyword) {
decl = RawDeclSyntax(self.parsePoundSourceLocationDirective())
Expand Down
9 changes: 9 additions & 0 deletions Sources/SwiftParser/Directives.swift
Expand Up @@ -70,6 +70,15 @@ extension Parser {
addSemicolonIfNeeded: (_ lastElement: Element, _ newItemAtStartOfLine: Bool, _ parser: inout Parser) -> Element? = { _, _, _ in nil },
syntax: (inout Parser, [Element]) -> RawSyntax?
) -> RawIfConfigDeclSyntax {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawIfConfigDeclSyntax(
remainingTokens,
clauses: RawIfConfigClauseListSyntax(elements: [], arena: self.arena),
poundEndif: missingToken(.poundEndifKeyword),
arena: self.arena
)
}

var clauses = [RawIfConfigClauseSyntax]()
do {
var firstIteration = true
Expand Down
21 changes: 21 additions & 0 deletions Sources/SwiftParser/Expressions.swift
Expand Up @@ -1757,6 +1757,16 @@ extension Parser {
/// dictionary-literal-items → dictionary-literal-item ','? | dictionary-literal-item ',' dictionary-literal-items
@_spi(RawSyntax)
public mutating func parseCollectionLiteral() -> RawExprSyntax {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawExprSyntax(RawArrayExprSyntax(
remainingTokens,
leftSquare: missingToken(.leftSquareBracket),
elements: RawArrayElementListSyntax(elements: [], arena: self.arena),
rightSquare: missingToken(.rightSquareBracket),
arena: self.arena
))
}

let (unexpectedBeforeLSquare, lsquare) = self.expect(.leftSquareBracket)

if let rsquare = self.consume(if: .rightSquareBracket) {
Expand Down Expand Up @@ -2177,6 +2187,17 @@ extension Parser {
/// tuple-element → expression | identifier ':' expression
@_spi(RawSyntax)
public mutating func parseArgumentListElements(pattern: PatternContext) -> [RawTupleExprElementSyntax] {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return [RawTupleExprElementSyntax(
remainingTokens,
label: nil,
colon: nil,
expression: RawExprSyntax(RawMissingExprSyntax(arena: self.arena)),
trailingComma: nil,
arena: self.arena
)]
}

guard !self.at(.rightParen) else {
return []
}
Expand Down
45 changes: 42 additions & 3 deletions Sources/SwiftParser/Parser.swift
Expand Up @@ -26,11 +26,16 @@ extension Parser {
}

/// Parse the source code in the given string as Swift source file.
/// If `maximumNestingLevel` is set, the parser will stop if a nesting level
/// that is greater than this value is reached to avoid overflowing the stack.
/// The nesting level is increased whenever a bracketed expression like `(`
/// or `{` is stared.
public static func parse(
source: UnsafeBufferPointer<UInt8>,
maximumNestingLevel: Int? = nil,
parseTransition: IncrementalParseTransition? = nil
) -> SourceFileSyntax {
var parser = Parser(source)
var parser = Parser(source, maximumNestingLevel: maximumNestingLevel)
// Extended lifetime is required because `SyntaxArena` in the parser must
// be alive until `Syntax(raw:)` retains the arena.
return withExtendedLifetime(parser) {
Expand Down Expand Up @@ -122,6 +127,18 @@ public struct Parser: TokenConsumer {
@_spi(RawSyntax)
public var currentToken: Lexer.Lexeme

/// The current nesting level, i.e. the number of tokens that
/// `startNestingLevel` minus the number of tokens that `endNestingLevel`
/// which have been consumed so far.
public var nestingLevel: Int = 0

/// When this nesting level is exceeded, the parser should stop parsing.
public let maximumNestingLevel: Int

/// A default maximum nesting level that is used if the client didn't
/// explicitly specify one.
public static let defaultMaximumNestingLevel = 256

/// Initializes a Parser from the given input buffer.
///
/// The lexer will copy any string data it needs from the resulting buffer
Expand All @@ -133,7 +150,9 @@ public struct Parser: TokenConsumer {
/// arena is created automatically, and `input` copied into the
/// arena. If non-`nil`, `input` must be the registered source
/// buffer of `arena` or a slice of the source buffer.
public init(_ input: UnsafeBufferPointer<UInt8>, arena: SyntaxArena? = nil) {
public init(_ input: UnsafeBufferPointer<UInt8>, maximumNestingLevel: Int? = nil, arena: SyntaxArena? = nil) {
self.maximumNestingLevel = maximumNestingLevel ?? Self.defaultMaximumNestingLevel

var sourceBuffer: UnsafeBufferPointer<UInt8>
if let arena = arena {
self.arena = arena
Expand All @@ -150,6 +169,7 @@ public struct Parser: TokenConsumer {

@_spi(RawSyntax)
public mutating func missingToken(_ kind: RawTokenKind, text: SyntaxText? = nil) -> RawTokenSyntax {
adjustNestingLevel(for: kind)
return RawTokenSyntax(missing: kind, text: text, arena: self.arena)
}

Expand All @@ -158,6 +178,12 @@ public struct Parser: TokenConsumer {
/// - Returns: The token that was consumed.
@_spi(RawSyntax)
public mutating func consumeAnyToken() -> RawTokenSyntax {
adjustNestingLevel(for: self.currentToken.tokenKind)
return self.consumeAnyTokenWithoutAdjustingNestingLevel()
}

@_spi(RawSyntax)
public mutating func consumeAnyTokenWithoutAdjustingNestingLevel() -> RawTokenSyntax {
let tok = self.currentToken
self.currentToken = self.lexemes.advance()
return RawTokenSyntax(
Expand All @@ -168,6 +194,17 @@ public struct Parser: TokenConsumer {
arena: arena
)
}

private mutating func adjustNestingLevel(for tokenKind: RawTokenKind) {
switch tokenKind {
case .leftAngle, .leftBrace, .leftParen, .leftSquareBracket, .poundIfKeyword:
nestingLevel += 1
case .rightAngle, .rightBrace, .rightParen, .rightSquareBracket, .poundEndifKeyword:
nestingLevel -= 1
default:
break
}
}
}

// MARK: Inspecting Tokens
Expand Down Expand Up @@ -278,7 +315,7 @@ extension Parser {
if handle.unexpectedTokens > 0 {
var unexpectedTokens = [RawSyntax]()
for _ in 0..<handle.unexpectedTokens {
unexpectedTokens.append(RawSyntax(self.consumeAnyToken()))
unexpectedTokens.append(RawSyntax(self.consumeAnyTokenWithoutAdjustingNestingLevel()))
}
unexpectedNodes = RawUnexpectedNodesSyntax(elements: unexpectedTokens, arena: self.arena)
} else {
Expand Down Expand Up @@ -511,6 +548,8 @@ extension Parser {
arena: self.arena
)

self.adjustNestingLevel(for: tokenKind)

// ... or a multi-character token with the first N characters being the one
// that we want to consume as a separate token.
// Careful: We need to reset the lexer to a point just before it saw the
Expand Down
12 changes: 12 additions & 0 deletions Sources/SwiftParser/Patterns.swift
Expand Up @@ -179,6 +179,18 @@ extension Parser {
/// tuple-pattern-element-list → tuple-pattern-element | tuple-pattern-element ',' tuple-pattern-element-list
/// tuple-pattern-element → pattern | identifier ':' pattern
mutating func parsePatternTupleElements() -> RawTuplePatternElementListSyntax {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawTuplePatternElementListSyntax(elements: [
RawTuplePatternElementSyntax(
remainingTokens,
labelName: nil,
labelColon: nil,
pattern: RawPatternSyntax(RawMissingPatternSyntax(arena: self.arena)),
trailingComma: nil,
arena: self.arena
)
], arena: self.arena)
}
var elements = [RawTuplePatternElementSyntax]()
do {
var keepGoing = true
Expand Down
43 changes: 37 additions & 6 deletions Sources/SwiftParser/TopLevel.swift
Expand Up @@ -13,6 +13,27 @@
@_spi(RawSyntax) import SwiftSyntax

extension Parser {
/// Consumes and returns all remaining tokens in the source file.
mutating func consumeRemaingingTokens() -> [RawSyntax] {
var extraneousTokens = [RawSyntax]()
while !self.at(.eof) {
extraneousTokens.append(RawSyntax(consumeAnyToken()))
}
return extraneousTokens
}

/// If the maximum nesting level has been reached, return the remaining tokens in the source file
/// as unexpected nodes that have the `isMaximumNestingLevelOverflow` bit set.
/// Check this in places that are likley to cause deep recursion and if this returns non-nil, abort parsing.
mutating func remainingTokensIfMaximumNestingLevelReached() -> RawUnexpectedNodesSyntax? {
if nestingLevel > self.maximumNestingLevel && self.currentToken.tokenKind != .eof {
let remainingTokens = self.consumeRemaingingTokens()
return RawUnexpectedNodesSyntax(elements: remainingTokens, isMaximumNestingLevelOverflow: true, arena: self.arena)
} else {
return nil
}
}

/// Parse the top level items in a file into a source file.
///
/// This function is the true parsing entrypoint that the high-level
Expand All @@ -26,13 +47,14 @@ extension Parser {
@_spi(RawSyntax)
public mutating func parseSourceFile() -> RawSourceFileSyntax {
let items = self.parseTopLevelCodeBlockItems()
var extraneousTokens = [RawSyntax]()
while !self.at(.eof) {
extraneousTokens.append(RawSyntax(consumeAnyToken()))
}
let unexpectedBeforeEof = extraneousTokens.isEmpty ? nil : RawUnexpectedNodesSyntax(elements: extraneousTokens, arena: self.arena)
let unexpectedBeforeEof = consumeRemaingingTokens()
let eof = self.consume(if: .eof)!
return .init(statements: items, unexpectedBeforeEof, eofToken: eof, arena: self.arena)
return .init(
statements: items,
RawUnexpectedNodesSyntax(unexpectedBeforeEof, arena: self.arena),
eofToken: eof,
arena: self.arena
)
}
}

Expand Down Expand Up @@ -119,6 +141,15 @@ extension Parser {
/// statements → statement statements?
@_spi(RawSyntax)
public mutating func parseCodeBlockItem(isAtTopLevel: Bool = false, allowInitDecl: Bool = true) -> RawCodeBlockItemSyntax? {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawCodeBlockItemSyntax(
remainingTokens,
item: RawSyntax(RawMissingExprSyntax(arena: self.arena)),
semicolon: nil,
errorTokens: nil,
arena: self.arena
)
}
if self.at(any: [.caseKeyword, .defaultKeyword]) {
// 'case' and 'default' are invalid in code block items.
// Parse them and put them in their own CodeBlockItem but as an unexpected node.
Expand Down
20 changes: 20 additions & 0 deletions Sources/SwiftParser/Types.swift
Expand Up @@ -392,6 +392,16 @@ extension Parser {
/// element-name → identifier
@_spi(RawSyntax)
public mutating func parseTupleTypeBody() -> RawTupleTypeSyntax {
if let remainingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawTupleTypeSyntax(
remainingTokens,
leftParen: missingToken(.leftParen),
elements: RawTupleTypeElementListSyntax(elements: [], arena: self.arena),
rightParen: missingToken(.rightParen),
arena: self.arena
)
}

let (unexpectedBeforeLParen, lparen) = self.expect(.leftParen)
var elements = [RawTupleTypeElementSyntax]()
do {
Expand Down Expand Up @@ -501,6 +511,16 @@ extension Parser {
/// dictionary-type → '[' type ':' type ']'
@_spi(RawSyntax)
public mutating func parseCollectionType() -> RawTypeSyntax {
if let remaingingTokens = remainingTokensIfMaximumNestingLevelReached() {
return RawTypeSyntax(RawArrayTypeSyntax(
remaingingTokens,
leftSquareBracket: missingToken(.leftSquareBracket),
elementType: RawTypeSyntax(RawMissingTypeSyntax(arena: self.arena)),
rightSquareBracket: missingToken(.rightSquareBracket),
arena: self.arena
))
}

let (unexpectedBeforeLSquare, lsquare) = self.expect(.leftSquareBracket)
let firstType = self.parseType()
if let colon = self.consume(if: .colon) {
Expand Down
12 changes: 12 additions & 0 deletions Sources/SwiftParserDiagnostics/ParseDiagnosticsGenerator.swift
Expand Up @@ -21,6 +21,10 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
/// method and that should thus not be visited.
private var handledNodes: [SyntaxIdentifier] = []

/// When set to `true`, no more diagnostics will be emitted.
/// Useful to stop any diagnostics after a maximum nesting level overflow was detected.
private var suppressRemainingDiagnostics: Bool = false

private init() {
super.init(viewMode: .all)
}
Expand Down Expand Up @@ -61,6 +65,9 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
fixIts: [FixIt] = [],
handledNodes: [SyntaxIdentifier] = []
) {
if suppressRemainingDiagnostics {
return
}
diagnostics.removeAll(where: { handledNodes.contains($0.node.id) })
diagnostics.append(Diagnostic(node: Syntax(node), position: position, message: message, highlights: highlights, notes: notes, fixIts: fixIts))
self.handledNodes.append(contentsOf: handledNodes)
Expand Down Expand Up @@ -162,6 +169,11 @@ public class ParseDiagnosticsGenerator: SyntaxAnyVisitor {
if node.allSatisfy({ handledNodes.contains($0.id) }) {
return .skipChildren
}
if node.hasMaximumNestingLevelOverflow {
addDiagnostic(node, .maximumNestingLevelOverflow)
suppressRemainingDiagnostics = true
return .skipChildren
}
if let tryKeyword = node.onlyToken(where: { $0.tokenKind == .tryKeyword }),
let nextToken = tryKeyword.nextToken(viewMode: .sourceAccurate),
nextToken.tokenKind.isKeyword {
Expand Down
3 changes: 3 additions & 0 deletions Sources/SwiftParserDiagnostics/ParserDiagnosticMessages.swift
Expand Up @@ -143,6 +143,9 @@ extension DiagnosticMessage where Self == StaticParserError {
public static var standaloneSemicolonStatement: Self {
.init("standalone ';' statements are not allowed")
}
public static var maximumNestingLevelOverflow: Self {
.init("parsing has exceeded the maximum nesting level")
}
public static var subscriptsCannotHaveNames: Self {
.init("subscripts cannot have a name")
}
Expand Down
5 changes: 5 additions & 0 deletions Sources/SwiftSyntax/Raw/RawSyntax.swift
Expand Up @@ -26,6 +26,7 @@ struct RecursiveRawSyntaxFlags: OptionSet {
/// Whether the tree contained by this layout has any missing or unexpected nodes.
static let hasError = RecursiveRawSyntaxFlags(rawValue: 1 << 0)
static let hasSequenceExpr = RecursiveRawSyntaxFlags(rawValue: 1 << 1)
static let hasMaximumNestingLevelOverflow = RecursiveRawSyntaxFlags(rawValue: 1 << 2)
}

/// Node data for RawSyntax tree. Tagged union plus common data.
Expand Down Expand Up @@ -633,6 +634,7 @@ extension RawSyntax {
public static func makeLayout(
kind: SyntaxKind,
uninitializedCount count: Int,
isMaximumNestingLevelOverflow: Bool = false,
arena: SyntaxArena,
initializingWith initializer: (UnsafeMutableBufferPointer<RawSyntax?>) -> Void
) -> RawSyntax {
Expand All @@ -656,6 +658,9 @@ extension RawSyntax {
if kind == .sequenceExpr {
recursiveFlags.insert(.hasSequenceExpr)
}
if isMaximumNestingLevelOverflow {
recursiveFlags.insert(.hasMaximumNestingLevelOverflow)
}
return .layout(
kind: kind,
layout: RawSyntaxBuffer(layoutBuffer),
Expand Down
4 changes: 4 additions & 0 deletions Sources/SwiftSyntax/Syntax.swift
Expand Up @@ -297,6 +297,10 @@ public extension SyntaxProtocol {
return raw.recursiveFlags.contains(.hasSequenceExpr)
}

var hasMaximumNestingLevelOverflow: Bool {
return raw.recursiveFlags.contains(.hasMaximumNestingLevelOverflow)
}

/// The parent of this syntax node, or `nil` if this node is the root.
var parent: Syntax? {
return data.parent.map(Syntax.init(_:))
Expand Down

0 comments on commit 20a8fe4

Please sign in to comment.