From 26791e17cf528757ca5e85fd846439751bd15544 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 27 Jun 2024 09:40:02 +0530 Subject: [PATCH] Use `TokenSource` to find new location for re-lexing --- crates/ruff_python_parser/src/lexer.rs | 109 ++----- crates/ruff_python_parser/src/token_source.rs | 17 +- ...id_syntax@f_string_unclosed_lbrace.py.snap | 285 ++++++------------ ...ing__line_continuation_windows_eol.py.snap | 5 +- 4 files changed, 148 insertions(+), 268 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 0640bd8349f666..b813a4596df33b 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1314,7 +1314,8 @@ impl<'src> Lexer<'src> { } } - /// Re-lex the current token in the context of a logical line. + /// Re-lex the [`NonLogicalNewline`] token at the given position in the context of a logical + /// line. /// /// Returns a boolean indicating whether the lexer's position has changed. This could result /// into the new current token being different than the previous current token but is not @@ -1368,7 +1369,10 @@ impl<'src> Lexer<'src> { /// /// [`Newline`]: TokenKind::Newline /// [`NonLogicalNewline`]: TokenKind::NonLogicalNewline - pub(crate) fn re_lex_logical_token(&mut self) -> bool { + pub(crate) fn re_lex_logical_token( + &mut self, + non_logical_newline_start: Option, + ) -> bool { if self.nesting == 0 { return false; } @@ -1383,84 +1387,35 @@ impl<'src> Lexer<'src> { return false; } - let mut current_position = self.current_range().start(); - let mut reverse_chars = self.source[..current_position.to_usize()] - .chars() - .rev() - .peekable(); - let mut newline_position = None; - - while let Some(ch) = reverse_chars.next() { - if is_python_whitespace(ch) { - current_position -= ch.text_len(); - continue; - } - - match ch { - '\n' => { - current_position -= ch.text_len(); - if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') { - current_position -= carriage_return.text_len(); - } - } - '\r' => { - current_position -= ch.text_len(); - } - _ => break, - } - - debug_assert!(matches!(ch, '\n' | '\r')); - - // Count the number of backslashes before the newline character. - let mut backslash_count = 0; - while reverse_chars.next_if_eq(&'\\').is_some() { - backslash_count += 1; - } + let Some(new_position) = non_logical_newline_start else { + return false; + }; - if backslash_count == 0 { - // No escapes: `\n` - newline_position = Some(current_position); - } else { - if backslash_count % 2 == 0 { - // Even number of backslashes i.e., all backslashes cancel each other out - // which means the newline character is not being escaped. - newline_position = Some(current_position); - } - current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count); - } + // Earlier we reduced the nesting level unconditionally. Now that we know the lexer's + // position is going to be moved back, the lexer needs to be put back into a + // parenthesized context if the current token is a closing parenthesis. + // + // ```py + // (a, [b, + // c + // ) + // ``` + // + // Here, the parser would request to re-lex the token when it's at `)` and can recover + // from an unclosed `[`. This method will move the lexer back to the newline character + // after `c` which means it goes back into parenthesized context. + if matches!( + self.current_kind, + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace + ) { + self.nesting += 1; } - // The lexer should only be moved if there's a newline character which needs to be - // re-lexed. - if let Some(newline_position) = newline_position { - // Earlier we reduced the nesting level unconditionally. Now that we know the lexer's - // position is going to be moved back, the lexer needs to be put back into a - // parenthesized context if the current token is a closing parenthesis. - // - // ```py - // (a, [b, - // c - // ) - // ``` - // - // Here, the parser would request to re-lex the token when it's at `)` and can recover - // from an unclosed `[`. This method will move the lexer back to the newline character - // after `c` which means it goes back into parenthesized context. - if matches!( - self.current_kind, - TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace - ) { - self.nesting += 1; - } - - self.cursor = Cursor::new(self.source); - self.cursor.skip_bytes(newline_position.to_usize()); - self.state = State::Other; - self.next_token(); - true - } else { - false - } + self.cursor = Cursor::new(self.source); + self.cursor.skip_bytes(new_position.to_usize()); + self.state = State::Other; + self.next_token(); + true } #[inline] diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index c9c9fa3ce69ad5..4851879c89731e 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -60,12 +60,23 @@ impl<'src> TokenSource<'src> { self.lexer.take_value() } - /// Calls the underlying [`re_lex_logical_token`] method on the lexer and updates the token - /// vector accordingly. + /// Calls the underlying [`re_lex_logical_token`] method on the lexer with the new lexer + /// position and updates the token vector accordingly. /// /// [`re_lex_logical_token`]: Lexer::re_lex_logical_token pub(crate) fn re_lex_logical_token(&mut self) { - if self.lexer.re_lex_logical_token() { + let mut non_logical_newline_start = None; + for token in self.tokens.iter().rev() { + match token.kind() { + TokenKind::NonLogicalNewline => { + non_logical_newline_start = Some(token.start()); + } + TokenKind::Comment => continue, + _ => break, + } + } + + if self.lexer.re_lex_logical_token(non_logical_newline_start) { let current_start = self.current_range().start(); while self .tokens diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap index bdd816e505aa19..dec8c516120b89 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@f_string_unclosed_lbrace.py.snap @@ -11,141 +11,105 @@ Module( body: [ Expr( StmtExpr { - range: 0..4, + range: 0..38, value: FString( ExprFString { - range: 0..4, + range: 0..38, value: FStringValue { - inner: Single( - FString( - FString { - range: 0..4, - elements: [ - Expression( - FStringExpressionElement { - range: 2..3, - expression: Name( - ExprName { - range: 3..3, - id: "", - ctx: Invalid, - }, - ), - debug_text: None, - conversion: None, - format_spec: None, - }, - ), - ], - flags: FStringFlags { - quote_style: Double, - prefix: Regular, - triple_quoted: false, + inner: Concatenated( + [ + FString( + FString { + range: 0..4, + elements: [ + Expression( + FStringExpressionElement { + range: 2..3, + expression: Name( + ExprName { + range: 3..3, + id: "", + ctx: Invalid, + }, + ), + debug_text: None, + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: false, + }, }, - }, - ), - ), - }, - }, - ), - }, - ), - Expr( - StmtExpr { - range: 5..14, - value: FString( - ExprFString { - range: 5..14, - value: FStringValue { - inner: Single( - FString( - FString { - range: 5..14, - elements: [ - Expression( - FStringExpressionElement { - range: 7..14, - expression: Name( - ExprName { - range: 8..11, - id: "foo", - ctx: Load, - }, - ), - debug_text: None, - conversion: None, - format_spec: None, - }, - ), - ], - flags: FStringFlags { - quote_style: Double, - prefix: Regular, - triple_quoted: false, + ), + FString( + FString { + range: 5..14, + elements: [ + Expression( + FStringExpressionElement { + range: 7..8, + expression: Name( + ExprName { + range: 8..8, + id: "", + ctx: Invalid, + }, + ), + debug_text: None, + conversion: None, + format_spec: None, + }, + ), + Literal( + FStringLiteralElement { + range: 8..13, + value: "foo!r", + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: false, + }, }, - }, - ), - ), - }, - }, - ), - }, - ), - Expr( - StmtExpr { - range: 15..23, - value: FString( - ExprFString { - range: 15..23, - value: FStringValue { - inner: Single( - FString( - FString { - range: 15..23, - elements: [ - Expression( - FStringExpressionElement { - range: 17..22, - expression: Name( - ExprName { - range: 18..21, - id: "foo", - ctx: Load, - }, - ), - debug_text: Some( - DebugText { - leading: "", - trailing: "=", - }, - ), - conversion: None, - format_spec: None, - }, - ), - ], - flags: FStringFlags { - quote_style: Double, - prefix: Regular, - triple_quoted: false, + ), + FString( + FString { + range: 15..23, + elements: [ + Expression( + FStringExpressionElement { + range: 17..22, + expression: Name( + ExprName { + range: 18..21, + id: "foo", + ctx: Load, + }, + ), + debug_text: Some( + DebugText { + leading: "", + trailing: "=", + }, + ), + conversion: None, + format_spec: None, + }, + ), + ], + flags: FStringFlags { + quote_style: Double, + prefix: Regular, + triple_quoted: false, + }, }, - }, - ), - ), - }, - }, - ), - }, - ), - Expr( - StmtExpr { - range: 24..37, - value: FString( - ExprFString { - range: 24..37, - value: FStringValue { - inner: Concatenated( - [ + ), FString( FString { range: 24..28, @@ -175,7 +139,7 @@ Module( ), FString( FString { - range: 29..37, + range: 29..38, elements: [ Expression( FStringExpressionElement { @@ -223,42 +187,8 @@ Module( | 1 | f"{" - | Syntax Error: f-string: unterminated string -2 | f"{foo!r" -3 | f"{foo=" - | - - - | -1 | f"{" - | Syntax Error: f-string: unterminated string 2 | f"{foo!r" -3 | f"{foo=" - | - - - | -1 | f"{" -2 | f"{foo!r" - | ^^ Syntax Error: missing closing quote in string literal -3 | f"{foo=" -4 | f"{" - | - - - | -1 | f"{" -2 | f"{foo!r" - | Syntax Error: f-string: unterminated string -3 | f"{foo=" -4 | f"{" - | - - - | -1 | f"{" -2 | f"{foo!r" - | Syntax Error: f-string: unterminated string + | ^^ Syntax Error: Expected FStringEnd, found FStringStart 3 | f"{foo=" 4 | f"{" | @@ -267,17 +197,7 @@ Module( | 1 | f"{" 2 | f"{foo!r" -3 | f"{foo=" - | ^^ Syntax Error: f-string: expecting '}' -4 | f"{" -5 | f"""{""" - | - - - | -1 | f"{" -2 | f"{foo!r" - | Syntax Error: Expected FStringEnd, found Unknown + | ^^^^^ Syntax Error: Expected an expression 3 | f"{foo=" 4 | f"{" | @@ -294,47 +214,40 @@ Module( | -1 | f"{" 2 | f"{foo!r" 3 | f"{foo=" - | Syntax Error: f-string: unterminated string 4 | f"{" + | ^^ Syntax Error: Expected FStringEnd, found FStringStart 5 | f"""{""" | | -1 | f"{" 2 | f"{foo!r" 3 | f"{foo=" - | Syntax Error: f-string: unterminated string 4 | f"{" + | ^ Syntax Error: Expected an expression 5 | f"""{""" | | -2 | f"{foo!r" 3 | f"{foo=" 4 | f"{" - | ^ Syntax Error: missing closing quote in string literal 5 | f"""{""" + |______^ | | -3 | f"{foo=" 4 | f"{" 5 | f"""{""" - | ^^^^ Syntax Error: Expected FStringEnd, found FStringStart | | -3 | f"{foo=" 4 | f"{" 5 | f"""{""" - | ^^^ Syntax Error: Expected an expression | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap index 9e22a93f78973c..3b106ee4081491 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_windows_eol.py.snap @@ -82,8 +82,9 @@ Module( | 1 | call(a, b, # comment \ -2 | / -3 | | def bar(): + | _______________________^ +2 | | | |_^ Syntax Error: Expected ')', found newline +3 | def bar(): 4 | pass |