From 892fdd9ab06df4d0a567086109a65bbfa616f323 Mon Sep 17 00:00:00 2001 From: Sam Rawlins Date: Fri, 21 Jul 2023 15:19:38 +0000 Subject: [PATCH] Move all comment-parsing code into pkg/analyzer I did not change the content of the functions, except the following: * modernized some variable declarations with `var`. * added periods to the end of comments. * privatized most methods. Work towards https://github.com/dart-lang/sdk/issues/50702 Change-Id: I4d63d3ee847316b58fa76c12558767c0825027a9 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/315243 Reviewed-by: Paul Berry Reviewed-by: Brian Wilkerson Commit-Queue: Samuel Rawlins --- .../lib/src/parser/parser_impl.dart | 286 ----------------- pkg/analyzer/lib/src/fasta/ast_builder.dart | 301 +++++++++++++++++- pkg/front_end/test/parser_test_parser.dart | 97 ------ 3 files changed, 292 insertions(+), 392 deletions(-) diff --git a/pkg/_fe_analyzer_shared/lib/src/parser/parser_impl.dart b/pkg/_fe_analyzer_shared/lib/src/parser/parser_impl.dart index c38d523e41c2..3f04e7194426 100644 --- a/pkg/_fe_analyzer_shared/lib/src/parser/parser_impl.dart +++ b/pkg/_fe_analyzer_shared/lib/src/parser/parser_impl.dart @@ -126,11 +126,8 @@ import 'util.dart' show findNonZeroLengthToken, findPreviousNonZeroLengthToken, - isLetter, - isLetterOrDigit, isOneOf, isOneOfOrEof, - isWhitespace, optional; /// An event generating parser of Dart programs. This parser expects all tokens @@ -9422,289 +9419,6 @@ class Parser { return dartdoc; } - /// Parse the comment references in a sequence of comment tokens - /// where [dartdoc] (not null) is the first token in the sequence. - /// Return the number of comment references parsed. - int parseCommentReferences(Token dartdoc) { - return dartdoc.lexeme.startsWith('///') - ? parseReferencesInSingleLineComments(dartdoc) - : parseReferencesInMultiLineComment(dartdoc); - } - - /// Parse the comment references in a multi-line comment token. - /// Return the number of comment references parsed. - int parseReferencesInMultiLineComment(Token multiLineDoc) { - String comment = multiLineDoc.lexeme; - assert(comment.startsWith('/**')); - int count = 0; - int length = comment.length; - int start = 3; - bool inCodeBlock = false; - int codeBlock = comment.indexOf('```', /* start = */ 3); - if (codeBlock == -1) { - codeBlock = length; - } - while (start < length) { - if (isWhitespace(comment.codeUnitAt(start))) { - ++start; - continue; - } - int end = comment.indexOf('\n', start); - if (end == -1) { - end = length; - } - if (codeBlock < end) { - inCodeBlock = !inCodeBlock; - codeBlock = comment.indexOf('```', end); - if (codeBlock == -1) { - codeBlock = length; - } - } - if (!inCodeBlock && !comment.startsWith('* ', start)) { - count += parseCommentReferencesInText(multiLineDoc, start, end); - } - start = end + 1; - } - return count; - } - - /// Parse the comment references in a sequence of single line comment tokens - /// where [token] is the first comment token in the sequence. - /// Return the number of comment references parsed. - int parseReferencesInSingleLineComments(Token? token) { - int count = 0; - bool inCodeBlock = false; - while (token != null && !token.isEof) { - String comment = token.lexeme; - if (comment.startsWith('///')) { - if (comment.indexOf('```', /* start = */ 3) != -1) { - inCodeBlock = !inCodeBlock; - } - if (!inCodeBlock) { - bool parseReferences; - if (comment.startsWith('/// ')) { - String? previousComment = token.previous?.lexeme; - parseReferences = previousComment != null && - previousComment.startsWith('///') && - previousComment.trim().length > 3; - } else { - parseReferences = true; - } - if (parseReferences) { - count += parseCommentReferencesInText( - token, /* start = */ 3, comment.length); - } - } - } - token = token.next; - } - return count; - } - - /// Parse the comment references in the text between [start] inclusive - /// and [end] exclusive. Return a count indicating how many were parsed. - int parseCommentReferencesInText(Token commentToken, int start, int end) { - String comment = commentToken.lexeme; - int count = 0; - int index = start; - while (index < end) { - int ch = comment.codeUnitAt(index); - if (ch == 0x5B /* `[` */) { - ++index; - if (index < end && comment.codeUnitAt(index) == 0x3A /* `:` */) { - // Skip old-style code block. - index = comment.indexOf(':]', index + 1) + 1; - if (index == 0 || index > end) { - break; - } - } else { - int referenceStart = index; - index = comment.indexOf(']', index); - if (index == -1 || index >= end) { - // Recovery: terminating ']' is not typed yet. - index = findReferenceEnd(comment, referenceStart, end); - } - if (ch != 0x27 /* `'` */ && ch != 0x22 /* `"` */) { - if (isLinkText(comment, index)) { - // TODO(brianwilkerson) Handle the case where there's a library - // URI in the link text. - } else { - listener.handleCommentReferenceText( - comment.substring(referenceStart, index), - commentToken.charOffset + referenceStart); - ++count; - } - } - } - } else if (ch == 0x60 /* '`' */) { - // Skip inline code block if there is both starting '`' and ending '`' - int endCodeBlock = comment.indexOf('`', index + 1); - if (endCodeBlock != -1 && endCodeBlock < end) { - index = endCodeBlock; - } - } - ++index; - } - return count; - } - - /// Given a comment reference without a closing `]`, - /// search for a possible place where `]` should be. - int findReferenceEnd(String comment, int index, int end) { - // Find the end of the identifier if there is one - if (index >= end || !isLetter(comment.codeUnitAt(index))) { - return index; - } - while (index < end && isLetterOrDigit(comment.codeUnitAt(index))) { - ++index; - } - - // Check for a trailing `.` - if (index >= end || comment.codeUnitAt(index) != 0x2E /* `.` */) { - return index; - } - ++index; - - // Find end of the identifier after the `.` - if (index >= end || !isLetter(comment.codeUnitAt(index))) { - return index; - } - ++index; - while (index < end && isLetterOrDigit(comment.codeUnitAt(index))) { - ++index; - } - return index; - } - - /// Parse the tokens in a single comment reference and generate either a - /// `handleCommentReference` or `handleNoCommentReference` event. - /// Return `true` if a comment reference was successfully parsed. - bool parseOneCommentReference(Token token, int referenceOffset) { - Token begin = token; - Token? newKeyword = null; - if (optional('new', token)) { - newKeyword = token; - token = token.next!; - } - Token? firstToken, firstPeriod, secondToken, secondPeriod; - if (token.isIdentifier && optional('.', token.next!)) { - secondToken = token; - secondPeriod = token.next!; - if (secondPeriod.next!.isIdentifier && - optional('.', secondPeriod.next!.next!)) { - firstToken = secondToken; - firstPeriod = secondPeriod; - secondToken = secondPeriod.next!; - secondPeriod = secondToken.next!; - } - Token identifier = secondPeriod.next!; - if (identifier.kind == KEYWORD_TOKEN && optional('new', identifier)) { - // Treat `new` after `.` is as an identifier so that it can represent an - // unnamed constructor. This support is separate from the - // constructor-tearoffs feature. - rewriter.replaceTokenFollowing( - secondPeriod, - new StringToken(TokenType.IDENTIFIER, identifier.lexeme, - identifier.charOffset)); - } - token = secondPeriod.next!; - } - if (token.isEof) { - // Recovery: Insert a synthetic identifier for code completion - token = rewriter.insertSyntheticIdentifier( - secondPeriod ?? newKeyword ?? syntheticPreviousToken(token)); - if (begin == token.next!) { - begin = token; - } - } - Token? operatorKeyword = null; - if (optional('operator', token)) { - operatorKeyword = token; - token = token.next!; - } - if (token.isUserDefinableOperator) { - if (token.next!.isEof) { - parseOneCommentReferenceRest(begin, referenceOffset, newKeyword, - firstToken, firstPeriod, secondToken, secondPeriod, token); - return true; - } - } else { - token = operatorKeyword ?? token; - if (token.next!.isEof) { - if (token.isIdentifier) { - parseOneCommentReferenceRest(begin, referenceOffset, newKeyword, - firstToken, firstPeriod, secondToken, secondPeriod, token); - return true; - } - Keyword? keyword = token.keyword; - if (newKeyword == null && - secondToken == null && - (keyword == Keyword.THIS || - keyword == Keyword.NULL || - keyword == Keyword.TRUE || - keyword == Keyword.FALSE)) { - // TODO(brianwilkerson) If we want to support this we will need to - // extend the definition of CommentReference to take an expression - // rather than an identifier. For now we just ignore it to reduce the - // number of errors produced, but that's probably not a valid long - // term approach. - } - } - } - listener.handleNoCommentReference(); - return false; - } - - void parseOneCommentReferenceRest( - Token begin, - int referenceOffset, - Token? newKeyword, - Token? firstToken, - Token? firstPeriod, - Token? secondToken, - Token? secondPeriod, - Token identifierOrOperator) { - // Adjust the token offsets to match the enclosing comment token. - Token token = begin; - do { - token.offset += referenceOffset; - token = token.next!; - } while (!token.isEof); - - listener.handleCommentReference(newKeyword, firstToken, firstPeriod, - secondToken, secondPeriod, identifierOrOperator); - } - - /// Given that we have just found bracketed text within the given [comment], - /// look to see whether that text is (a) followed by a parenthesized link - /// address, (b) followed by a colon, or (c) followed by optional whitespace - /// and another square bracket. The [rightIndex] is the index of the right - /// bracket. Return `true` if the bracketed text is followed by a link - /// address. - /// - /// This method uses the syntax described by the - /// markdown - /// project. - bool isLinkText(String comment, int rightIndex) { - int length = comment.length; - int index = rightIndex + 1; - if (index >= length) { - return false; - } - int ch = comment.codeUnitAt(index); - if (ch == 0x28 || ch == 0x3A) { - return true; - } - while (isWhitespace(ch)) { - index = index + 1; - if (index >= length) { - return false; - } - ch = comment.codeUnitAt(index); - } - return ch == 0x5B; - } - /// pattern ::= logicalOrPattern /// logicalOrPattern ::= logicalOrPattern ( '|' logicalAndPattern )? /// logicalAndPattern ::= logicalAndPattern ( '&' relationalPattern )? diff --git a/pkg/analyzer/lib/src/fasta/ast_builder.dart b/pkg/analyzer/lib/src/fasta/ast_builder.dart index b71c057cbf5b..f144b77fbc7f 100644 --- a/pkg/analyzer/lib/src/fasta/ast_builder.dart +++ b/pkg/analyzer/lib/src/fasta/ast_builder.dart @@ -47,6 +47,8 @@ import 'package:_fe_analyzer_shared/src/parser/parser.dart' import 'package:_fe_analyzer_shared/src/parser/quote.dart'; import 'package:_fe_analyzer_shared/src/parser/stack_listener.dart' show NullValues, StackListener; +import 'package:_fe_analyzer_shared/src/parser/util.dart' + show isLetter, isLetterOrDigit, isWhitespace, optional; import 'package:_fe_analyzer_shared/src/scanner/errors.dart' show translateErrorToken; import 'package:_fe_analyzer_shared/src/scanner/scanner.dart'; @@ -5443,28 +5445,63 @@ class AstBuilder extends StackListener { throw UnsupportedError(message.problemMessage); } + /// Given that we have just found bracketed text within the given [comment], + /// look to see whether that text is (a) followed by a parenthesized link + /// address, (b) followed by a colon, or (c) followed by optional whitespace + /// and another square bracket. + /// + /// [rightIndex] is the index of the right bracket. Return `true` if the + /// bracketed text is followed by a link address. + /// + /// This method uses the syntax described by the + /// markdown + /// project. + bool isLinkText(String comment, int rightIndex) { + var length = comment.length; + var index = rightIndex + 1; + if (index >= length) { + return false; + } + var ch = comment.codeUnitAt(index); + if (ch == 0x28 || ch == 0x3A) { + return true; + } + while (isWhitespace(ch)) { + index = index + 1; + if (index >= length) { + return false; + } + ch = comment.codeUnitAt(index); + } + return ch == 0x5B; + } + /// Return `true` if [token] is either `null` or is the symbol or keyword /// [value]. bool optionalOrNull(String value, Token? token) { return token == null || identical(value, token.stringValue); } + /// Parse the comment references in a sequence of comment tokens where + /// [dartdoc] is the first token in the sequence. List parseCommentReferences(Token dartdoc) { - // Parse dartdoc into potential comment reference source/offset pairs - int count = parser.parseCommentReferences(dartdoc); - List sourcesAndOffsets = List.filled(count * 2, null); + // Parse dartdoc into potential comment reference source/offset pairs. + var count = dartdoc.lexeme.startsWith('///') + ? _parseReferencesInSingleLineComments(dartdoc) + : _parseReferencesInMultiLineComment(dartdoc); + var sourcesAndOffsets = List.filled(count * 2, null); popList(count * 2, sourcesAndOffsets); - // Parse each of the source/offset pairs into actual comment references + // Parse each of the source/offset pairs into actual comment references. count = 0; - int index = 0; + var index = 0; while (index < sourcesAndOffsets.length) { var referenceSource = sourcesAndOffsets[index++] as String; var referenceOffset = sourcesAndOffsets[index++] as int; - ScannerResult result = scanString(referenceSource); + var result = scanString(referenceSource); if (!result.hasErrors) { - Token token = result.tokens; - if (parser.parseOneCommentReference(token, referenceOffset)) { + var token = result.tokens; + if (_parseOneCommentReference(token, referenceOffset)) { ++count; } } @@ -5698,7 +5735,7 @@ class AstBuilder extends StackListener { } } - // Build and return the comment + // Build and return the comment. var references = parseCommentReferences(dartdoc); List tokens = [dartdoc]; if (dartdoc.lexeme.startsWith('///')) { @@ -5717,6 +5754,34 @@ class AstBuilder extends StackListener { ); } + /// Given a comment reference without a closing `]`, search for a possible + /// place where `]` should be. + int _findCommentReferenceEnd(String comment, int index, int end) { + // Find the end of the identifier if there is one. + if (index >= end || !isLetter(comment.codeUnitAt(index))) { + return index; + } + while (index < end && isLetterOrDigit(comment.codeUnitAt(index))) { + ++index; + } + + // Check for a trailing `.`. + if (index >= end || comment.codeUnitAt(index) != 0x2E /* `.` */) { + return index; + } + ++index; + + // Find end of the identifier after the `.`. + if (index >= end || !isLetter(comment.codeUnitAt(index))) { + return index; + } + ++index; + while (index < end && isLetterOrDigit(comment.codeUnitAt(index))) { + ++index; + } + return index; + } + void _handleInstanceCreation(Token? token) { var arguments = pop() as MethodInvocationImpl; ConstructorNameImpl constructorName; @@ -5738,6 +5803,224 @@ class AstBuilder extends StackListener { ); } + /// Parse the comment references in the text between [start] inclusive + /// and [end] exclusive. + /// + /// Return the number of comment references that were parsed. + int _parseCommentReferencesInText(Token commentToken, int start, int end) { + var comment = commentToken.lexeme; + var count = 0; + var index = start; + while (index < end) { + var ch = comment.codeUnitAt(index); + if (ch == 0x5B /* `[` */) { + ++index; + if (index < end && comment.codeUnitAt(index) == 0x3A /* `:` */) { + // Skip old-style code block. + index = comment.indexOf(':]', index + 1) + 1; + if (index == 0 || index > end) { + break; + } + } else { + var referenceStart = index; + index = comment.indexOf(']', index); + if (index == -1 || index >= end) { + // Recovery: terminating ']' is not typed yet. + index = _findCommentReferenceEnd(comment, referenceStart, end); + } + if (ch != 0x27 /* `'` */ && ch != 0x22 /* `"` */) { + if (isLinkText(comment, index)) { + // TODO(brianwilkerson) Handle the case where there's a library + // URI in the link text. + } else { + /*listener.*/ handleCommentReferenceText( + comment.substring(referenceStart, index), + commentToken.charOffset + referenceStart); + ++count; + } + } + } + } else if (ch == 0x60 /* '`' */) { + // Skip inline code block if there is both starting '`' and ending '`'. + var endCodeBlock = comment.indexOf('`', index + 1); + if (endCodeBlock != -1 && endCodeBlock < end) { + index = endCodeBlock; + } + } + ++index; + } + return count; + } + + /// Parse the tokens in a single comment reference and generate either a + /// [_handleCommentReference] or [_handleNoCommentReference] event. + /// Return `true` if a comment reference was successfully parsed. + bool _parseOneCommentReference(Token token, int referenceOffset) { + var begin = token; + Token? newKeyword; + if (optional('new', token)) { + newKeyword = token; + token = token.next!; + } + Token? firstToken, firstPeriod, secondToken, secondPeriod; + if (token.isIdentifier && optional('.', token.next!)) { + secondToken = token; + secondPeriod = token.next!; + if (secondPeriod.next!.isIdentifier && + optional('.', secondPeriod.next!.next!)) { + firstToken = secondToken; + firstPeriod = secondPeriod; + secondToken = secondPeriod.next!; + secondPeriod = secondToken.next!; + } + var identifier = secondPeriod.next!; + if (identifier.kind == KEYWORD_TOKEN && optional('new', identifier)) { + // Treat `new` after `.` is as an identifier so that it can represent an + // unnamed constructor. This support is separate from the + // constructor-tearoffs feature. + parser.rewriter.replaceTokenFollowing( + secondPeriod, + StringToken(TokenType.IDENTIFIER, identifier.lexeme, + identifier.charOffset)); + } + token = secondPeriod.next!; + } + if (token.isEof) { + // Recovery: Insert a synthetic identifier for code completion + token = parser.rewriter.insertSyntheticIdentifier( + secondPeriod ?? newKeyword ?? parser.syntheticPreviousToken(token)); + if (begin == token.next!) { + begin = token; + } + } + Token? operatorKeyword; + if (optional('operator', token)) { + operatorKeyword = token; + token = token.next!; + } + if (token.isUserDefinableOperator) { + if (token.next!.isEof) { + _parseOneCommentReferenceRest(begin, referenceOffset, newKeyword, + firstToken, firstPeriod, secondToken, secondPeriod, token); + return true; + } + } else { + token = operatorKeyword ?? token; + if (token.next!.isEof) { + if (token.isIdentifier) { + _parseOneCommentReferenceRest(begin, referenceOffset, newKeyword, + firstToken, firstPeriod, secondToken, secondPeriod, token); + return true; + } + var keyword = token.keyword; + if (newKeyword == null && + secondToken == null && + (keyword == Keyword.THIS || + keyword == Keyword.NULL || + keyword == Keyword.TRUE || + keyword == Keyword.FALSE)) { + // TODO(brianwilkerson) If we want to support this we will need to + // extend the definition of CommentReference to take an expression + // rather than an identifier. For now we just ignore it to reduce the + // number of errors produced, but that's probably not a valid long + // term approach. + } + } + } + handleNoCommentReference(); + return false; + } + + void _parseOneCommentReferenceRest( + Token begin, + int referenceOffset, + Token? newKeyword, + Token? firstToken, + Token? firstPeriod, + Token? secondToken, + Token? secondPeriod, + Token identifierOrOperator) { + // Adjust the token offsets to match the enclosing comment token. + var token = begin; + do { + token.offset += referenceOffset; + token = token.next!; + } while (!token.isEof); + + handleCommentReference(newKeyword, firstToken, firstPeriod, secondToken, + secondPeriod, identifierOrOperator); + } + + /// Parse the comment references in a multi-line comment token. + /// Return the number of comment references parsed. + int _parseReferencesInMultiLineComment(Token multiLineDoc) { + var comment = multiLineDoc.lexeme; + assert(comment.startsWith('/**')); + var count = 0; + var length = comment.length; + var start = 3; + var inCodeBlock = false; + var codeBlock = comment.indexOf('```', /* start = */ 3); + if (codeBlock == -1) { + codeBlock = length; + } + while (start < length) { + if (isWhitespace(comment.codeUnitAt(start))) { + ++start; + continue; + } + var end = comment.indexOf('\n', start); + if (end == -1) { + end = length; + } + if (codeBlock < end) { + inCodeBlock = !inCodeBlock; + codeBlock = comment.indexOf('```', end); + if (codeBlock == -1) { + codeBlock = length; + } + } + if (!inCodeBlock && !comment.startsWith('* ', start)) { + count += _parseCommentReferencesInText(multiLineDoc, start, end); + } + start = end + 1; + } + return count; + } + + /// Parse the comment references in a sequence of single line comment tokens + /// where [token] is the first comment token in the sequence. + /// Return the number of comment references parsed. + int _parseReferencesInSingleLineComments(Token? token) { + var count = 0; + var inCodeBlock = false; + while (token != null && !token.isEof) { + var comment = token.lexeme; + if (comment.startsWith('///')) { + if (comment.indexOf('```', /* start = */ 3) != -1) { + inCodeBlock = !inCodeBlock; + } + if (!inCodeBlock) { + bool parseReferences; + if (comment.startsWith('/// ')) { + var previousComment = token.previous?.lexeme; + parseReferences = previousComment != null && + previousComment.startsWith('///') && + previousComment.trim().length > 3; + } else { + parseReferences = true; + } + if (parseReferences) { + count += _parseCommentReferencesInText( + token, /* start = */ 3, comment.length); + } + } + } + token = token.next; + } + return count; + } + List _popNamedTypeList({ required ErrorCode errorCode, }) { diff --git a/pkg/front_end/test/parser_test_parser.dart b/pkg/front_end/test/parser_test_parser.dart index 5d1778f71586..ad6ca83d7bc4 100644 --- a/pkg/front_end/test/parser_test_parser.dart +++ b/pkg/front_end/test/parser_test_parser.dart @@ -2639,103 +2639,6 @@ class TestParser extends Parser { return result; } - @override - int parseCommentReferences(Token dartdoc) { - doPrint('parseCommentReferences(' '$dartdoc)'); - indent++; - var result = super.parseCommentReferences(dartdoc); - indent--; - return result; - } - - @override - int parseReferencesInMultiLineComment(Token multiLineDoc) { - doPrint('parseReferencesInMultiLineComment(' '$multiLineDoc)'); - indent++; - var result = super.parseReferencesInMultiLineComment(multiLineDoc); - indent--; - return result; - } - - @override - int parseReferencesInSingleLineComments(Token? token) { - doPrint('parseReferencesInSingleLineComments(' '$token)'); - indent++; - var result = super.parseReferencesInSingleLineComments(token); - indent--; - return result; - } - - @override - int parseCommentReferencesInText(Token commentToken, int start, int end) { - doPrint( - 'parseCommentReferencesInText(' '$commentToken, ' '$start, ' '$end)'); - indent++; - var result = super.parseCommentReferencesInText(commentToken, start, end); - indent--; - return result; - } - - @override - int findReferenceEnd(String comment, int index, int end) { - doPrint('findReferenceEnd(' '$comment, ' '$index, ' '$end)'); - indent++; - var result = super.findReferenceEnd(comment, index, end); - indent--; - return result; - } - - @override - bool parseOneCommentReference(Token token, int referenceOffset) { - doPrint('parseOneCommentReference(' '$token, ' '$referenceOffset)'); - indent++; - var result = super.parseOneCommentReference(token, referenceOffset); - indent--; - return result; - } - - @override - void parseOneCommentReferenceRest( - Token begin, - int referenceOffset, - Token? newKeyword, - Token? firstToken, - Token? firstPeriod, - Token? secondToken, - Token? secondPeriod, - Token identifierOrOperator) { - doPrint('parseOneCommentReferenceRest(' - '$begin, ' - '$referenceOffset, ' - '$newKeyword, ' - '$firstToken, ' - '$firstPeriod, ' - '$secondToken, ' - '$secondPeriod, ' - '$identifierOrOperator)'); - indent++; - var result = super.parseOneCommentReferenceRest( - begin, - referenceOffset, - newKeyword, - firstToken, - firstPeriod, - secondToken, - secondPeriod, - identifierOrOperator); - indent--; - return result; - } - - @override - bool isLinkText(String comment, int rightIndex) { - doPrint('isLinkText(' '$comment, ' '$rightIndex)'); - indent++; - var result = super.isLinkText(comment, rightIndex); - indent--; - return result; - } - @override Token parsePattern(Token token, PatternContext patternContext, {int precedence = 1}) {