Skip to content

Commit

Permalink
Use TokenSource to find new location for re-lexing
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Jun 27, 2024
1 parent 55f4812 commit f7fd083
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 268 deletions.
110 changes: 33 additions & 77 deletions crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,8 @@ impl<'src> Lexer<'src> {
}
}

/// Re-lex the current token in the context of a logical line.
/// Re-lex the [`NonLogicalNewline`] token at the given position in the context of a logical
/// line.
///
/// Returns a boolean indicating whether the lexer's position has changed. This could result
/// into the new current token being different than the previous current token but is not
Expand Down Expand Up @@ -1368,7 +1369,10 @@ impl<'src> Lexer<'src> {
///
/// [`Newline`]: TokenKind::Newline
/// [`NonLogicalNewline`]: TokenKind::NonLogicalNewline
pub(crate) fn re_lex_logical_token(&mut self) -> bool {
pub(crate) fn re_lex_logical_token(
&mut self,
non_logical_newline_start: Option<TextSize>,
) -> bool {
if self.nesting == 0 {
return false;
}
Expand All @@ -1383,84 +1387,36 @@ impl<'src> Lexer<'src> {
return false;
}

let mut current_position = self.current_range().start();
let mut reverse_chars = self.source[..current_position.to_usize()]
.chars()
.rev()
.peekable();
let mut newline_position = None;

while let Some(ch) = reverse_chars.next() {
if is_python_whitespace(ch) {
current_position -= ch.text_len();
continue;
}

match ch {
'\n' => {
current_position -= ch.text_len();
if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') {
current_position -= carriage_return.text_len();
}
}
'\r' => {
current_position -= ch.text_len();
}
_ => break,
}

debug_assert!(matches!(ch, '\n' | '\r'));

// Count the number of backslashes before the newline character.
let mut backslash_count = 0;
while reverse_chars.next_if_eq(&'\\').is_some() {
backslash_count += 1;
}
let Some(new_position) = non_logical_newline_start else {
return false;
};

if backslash_count == 0 {
// No escapes: `\n`
newline_position = Some(current_position);
} else {
if backslash_count % 2 == 0 {
// Even number of backslashes i.e., all backslashes cancel each other out
// which means the newline character is not being escaped.
newline_position = Some(current_position);
}
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
}
// Earlier we reduced the nesting level unconditionally. Now that we know the lexer's
// position is going to be moved back, the lexer needs to be put back into a
// parenthesized context if the current token is a closing parenthesis.
//
// ```py
// (a, [b,
// c
// )
// ```
//
// Here, the parser would request to re-lex the token when it's at `)` and can recover
// from an unclosed `[`. This method will move the lexer back to the newline character
// after `c` which means it goes back into parenthesized context.
if matches!(
self.current_kind,
TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace
) {
self.nesting += 1;
}

// The lexer should only be moved if there's a newline character which needs to be
// re-lexed.
if let Some(newline_position) = newline_position {
// Earlier we reduced the nesting level unconditionally. Now that we know the lexer's
// position is going to be moved back, the lexer needs to be put back into a
// parenthesized context if the current token is a closing parenthesis.
//
// ```py
// (a, [b,
// c
// )
// ```
//
// Here, the parser would request to re-lex the token when it's at `)` and can recover
// from an unclosed `[`. This method will move the lexer back to the newline character
// after `c` which means it goes back into parenthesized context.
if matches!(
self.current_kind,
TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace
) {
self.nesting += 1;
}

self.cursor = Cursor::new(self.source);
self.cursor.skip_bytes(newline_position.to_usize());
self.state = State::Other;
self.next_token();
true
} else {
false
}
self.cursor = Cursor::new(self.source);
self.cursor.skip_bytes(new_position.to_usize());
debug_assert!(matches!(self.cursor.first(), '\n' | '\r'));
self.state = State::Other;
self.next_token();
true
}

#[inline]
Expand Down
17 changes: 14 additions & 3 deletions crates/ruff_python_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,23 @@ impl<'src> TokenSource<'src> {
self.lexer.take_value()
}

/// Calls the underlying [`re_lex_logical_token`] method on the lexer and updates the token
/// vector accordingly.
/// Calls the underlying [`re_lex_logical_token`] method on the lexer with the new lexer
/// position and updates the token vector accordingly.
///
/// [`re_lex_logical_token`]: Lexer::re_lex_logical_token
pub(crate) fn re_lex_logical_token(&mut self) {
if self.lexer.re_lex_logical_token() {
let mut non_logical_newline_start = None;
for token in self.tokens.iter().rev() {
match token.kind() {
TokenKind::NonLogicalNewline => {
non_logical_newline_start = Some(token.start());
}
TokenKind::Comment => continue,
_ => break,
}
}

if self.lexer.re_lex_logical_token(non_logical_newline_start) {
let current_start = self.current_range().start();
while self
.tokens
Expand Down
Loading

0 comments on commit f7fd083

Please sign in to comment.