Skip to content

Commit

Permalink
Bump soft keyword as name token (#11459)
Browse files Browse the repository at this point in the history
## Summary

This PR updates various `bump` methods to allow parser to bump the soft
keyword as a name token.

This is required because the token vector should store the resolved
token. Otherwise, the token vector and the AST are out of sync.

The process is as follows:
* One common method to bump the given kind for the parser which is
`do_bump`
* This calls in the `bump` method on the token source
* The token source adds the given token kind and bump itself to the next
non-trivia token
* While doing this bump, it still adds the trivia tokens to the token
vector

The end result is that the parser informs the token source to add the
given kind to the token vector and move on to the next token.

Here, we can then introduce a `bump_soft_keyword_as_name` method which
asserts that the current token is a soft keyword and bumps it as a name
token instead. The `parse_identifier` method then calls the new method
instead.
  • Loading branch information
dhruvmanila committed May 24, 2024
1 parent 58e444a commit 5e08865
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 41 deletions.
4 changes: 2 additions & 2 deletions crates/ruff_python_parser/src/parser/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ impl<'src> Parser<'src> {

if self.current_token_kind().is_soft_keyword() {
let id = self.src_text(range).to_string();
self.bump_any();
self.bump_soft_keyword_as_name();
return ast::Identifier { id, range };
}

Expand Down Expand Up @@ -1343,7 +1343,7 @@ impl<'src> Parser<'src> {
// `Invalid` tokens are created when there's a lexical error, so
// we ignore it here to avoid creating unexpected token errors
TokenKind::Unknown => {
parser.next_token();
parser.bump_any();
return;
}
tok => {
Expand Down
77 changes: 48 additions & 29 deletions crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ impl Program {
&self.ast
}

/// Returns all the tokens for the program.
pub fn tokens(&self) -> &[Token] {
&self.tokens
}

/// Returns a list of syntax errors found during parsing.
pub fn errors(&self) -> &[ParseError] {
&self.parse_errors
Expand Down Expand Up @@ -154,7 +159,7 @@ impl<'src> Parser<'src> {
if self.at(TokenKind::EndOfFile) {
break;
}
self.next_token();
self.bump_any();
}
}

Expand Down Expand Up @@ -315,8 +320,8 @@ impl<'src> Parser<'src> {
}

/// Moves the parser to the next token.
fn next_token(&mut self) {
self.tokens.next_token();
fn do_bump(&mut self, kind: TokenKind) {
self.tokens.bump(kind);

self.current_token_id.increment();

Expand Down Expand Up @@ -357,15 +362,15 @@ impl<'src> Parser<'src> {
self.current_token_id
}

/// Eat the current token if it is of the given kind, returning `true` in
/// that case. Otherwise, return `false`.
fn eat(&mut self, kind: TokenKind) -> bool {
if self.at(kind) {
self.next_token();
true
} else {
false
}
/// Bumps the current token assuming it is of the given kind.
///
/// # Panics
///
/// If the current token is not of the given kind.
fn bump(&mut self, kind: TokenKind) {
assert_eq!(self.current_token_kind(), kind);

self.do_bump(kind);
}

/// Take the token value from the underlying token source and bump the current token.
Expand All @@ -379,26 +384,16 @@ impl<'src> Parser<'src> {
value
}

/// Bumps the current token assuming it is of the given kind.
///
/// # Panics
///
/// If the current token is not of the given kind.
fn bump(&mut self, kind: TokenKind) {
assert_eq!(self.current_token_kind(), kind);

self.next_token();
}

/// Bumps the current token assuming it is found in the given token set.
///
/// # Panics
///
/// If the current token is not found in the given token set.
fn bump_ts(&mut self, ts: TokenSet) {
assert!(ts.contains(self.current_token_kind()));
let kind = self.current_token_kind();
assert!(ts.contains(kind));

self.next_token();
self.do_bump(kind);
}

/// Bumps the current token regardless of its kind and advances to the next token.
Expand All @@ -407,11 +402,35 @@ impl<'src> Parser<'src> {
///
/// If the parser is at end of file.
fn bump_any(&mut self) {
assert_ne!(self.current_token_kind(), TokenKind::EndOfFile);
let kind = self.current_token_kind();
assert_ne!(kind, TokenKind::EndOfFile);

self.do_bump(kind);
}

/// Bumps the soft keyword token as a `Name` token.
///
/// # Panics
///
/// If the current token is not a soft keyword.
pub(crate) fn bump_soft_keyword_as_name(&mut self) {
assert!(self.current_token_kind().is_soft_keyword());

self.next_token();
self.do_bump(TokenKind::Name);
}

/// Consume the current token if it is of the given kind. Returns `true` if it matches, `false`
/// otherwise.
fn eat(&mut self, kind: TokenKind) -> bool {
if self.at(kind) {
self.do_bump(kind);
true
} else {
false
}
}

/// Eat the current token if its of the expected kind, otherwise adds an appropriate error.
fn expect(&mut self, expected: TokenKind) -> bool {
if self.eat(expected) {
return true;
Expand Down Expand Up @@ -522,7 +541,7 @@ impl<'src> Parser<'src> {
break;
}

self.next_token();
self.bump_any();
}
}

Expand Down Expand Up @@ -606,7 +625,7 @@ impl<'src> Parser<'src> {
trailing_comma_range = None;
}

self.next_token();
self.bump_any();
}
}

Expand Down
34 changes: 24 additions & 10 deletions crates/ruff_python_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl<'src> TokenSource<'src> {
let mut source = TokenSource::new(lexer);

// Initialize the token source so that the current token is set correctly.
source.next_token();
source.do_bump();
source
}

Expand All @@ -53,26 +53,40 @@ impl<'src> TokenSource<'src> {
/// Returns the next non-trivia token without consuming it.
pub(crate) fn peek(&mut self) -> TokenKind {
let checkpoint = self.lexer.checkpoint();
let next = loop {
let next = self.next_non_trivia_token();
self.lexer.rewind(checkpoint);
next
}

/// Bumps the token source to the next non-trivia token.
///
/// It pushes the given kind to the token vector with the current token range.
pub(crate) fn bump(&mut self, kind: TokenKind) {
self.tokens.push(Token::new(kind, self.current_range()));
self.do_bump();
}

/// Bumps the token source to the next non-trivia token without adding the current token to the
/// token vector. It does add the trivia tokens to the token vector.
fn do_bump(&mut self) {
loop {
let next = self.lexer.next_token();
if next.is_trivia() {
self.tokens.push(next);
continue;
}
break next.kind();
};
self.lexer.rewind(checkpoint);
next
break;
}
}

/// Moves the lexer to the next non-trivia token.
pub(crate) fn next_token(&mut self) {
/// Returns the next non-trivia token without adding it to the token vector.
fn next_non_trivia_token(&mut self) -> TokenKind {
loop {
let next = self.lexer.next_token();
self.tokens.push(next);
if next.is_trivia() {
continue;
}
break;
break next.kind();
}
}

Expand Down

0 comments on commit 5e08865

Please sign in to comment.