diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 3ba20e78e88e8..448a3e7b34681 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -566,6 +566,9 @@ impl<'source> Lexer<'source> { // Tracks the last offset of token value that has been written to `normalized`. let mut last_offset = self.offset(); + // This isn't going to change for the duration of the loop. + let in_format_spec = fstring.is_in_format_spec(self.nesting); + let mut in_named_unicode = false; loop { @@ -585,6 +588,13 @@ impl<'source> Lexer<'source> { }); } '\n' | '\r' if !fstring.is_triple_quoted() => { + // If we encounter a newline while we're in a format spec, then + // we stop here and let the lexer emit the newline token. + // + // Relevant discussion: https://github.com/python/cpython/issues/110259 + if in_format_spec { + break; + } return Err(LexicalError { error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString), location: self.offset(), @@ -620,7 +630,7 @@ impl<'source> Lexer<'source> { } } '{' => { - if self.cursor.second() == '{' && !fstring.is_in_format_spec(self.nesting) { + if self.cursor.second() == '{' && !in_format_spec { self.cursor.bump(); normalized .push_str(&self.source[TextRange::new(last_offset, self.offset())]); @@ -634,9 +644,7 @@ impl<'source> Lexer<'source> { if in_named_unicode { in_named_unicode = false; self.cursor.bump(); - } else if self.cursor.second() == '}' - && !fstring.is_in_format_spec(self.nesting) - { + } else if self.cursor.second() == '}' && !in_format_spec { self.cursor.bump(); normalized .push_str(&self.source[TextRange::new(last_offset, self.offset())]); @@ -1194,6 +1202,9 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } Tok::NonLogicalNewline }, self.token_range(), @@ -1207,6 +1218,9 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } Tok::NonLogicalNewline }, self.token_range(), @@ -2051,6 +2065,29 @@ def f(arg=%timeit a = b): assert_debug_snapshot!(lex_source(source)); } + #[test] + fn test_fstring_with_multiline_format_spec() { + // The last f-string is invalid syntactically but we should still lex it. + // Note that the `b` is a `Name` token and not a `FStringMiddle` token. + let source = r"f'''__{ + x:d +}__''' +f'''__{ + x:a + b + c +}__''' +f'__{ + x:d +}__' +f'__{ + x:a + b +}__' +"; + assert_debug_snapshot!(lex_source(source)); + } + #[test] fn test_fstring_conversion() { let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#; diff --git a/crates/ruff_python_parser/src/parser.rs b/crates/ruff_python_parser/src/parser.rs index 5b7943d56889c..ae6d9a53a2f94 100644 --- a/crates/ruff_python_parser/src/parser.rs +++ b/crates/ruff_python_parser/src/parser.rs @@ -1290,6 +1290,11 @@ match foo: f"\{foo}\{bar:\}" f"\\{{foo\\}}" +f"""{ + foo:x + y + z +}""" "# .trim(), "", diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap new file mode 100644 index 0000000000000..6cab3fb5a5bda --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap @@ -0,0 +1,244 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(source) +--- +[ + ( + FStringStart, + 0..4, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 4..6, + ), + ( + Lbrace, + 6..7, + ), + ( + NonLogicalNewline, + 7..8, + ), + ( + Name { + name: "x", + }, + 12..13, + ), + ( + Colon, + 13..14, + ), + ( + FStringMiddle { + value: "d\n", + is_raw: false, + }, + 14..16, + ), + ( + Rbrace, + 16..17, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 17..19, + ), + ( + FStringEnd, + 19..22, + ), + ( + Newline, + 22..23, + ), + ( + FStringStart, + 23..27, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 27..29, + ), + ( + Lbrace, + 29..30, + ), + ( + NonLogicalNewline, + 30..31, + ), + ( + Name { + name: "x", + }, + 35..36, + ), + ( + Colon, + 36..37, + ), + ( + FStringMiddle { + value: "a\n b\n c\n", + is_raw: false, + }, + 37..61, + ), + ( + Rbrace, + 61..62, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 62..64, + ), + ( + FStringEnd, + 64..67, + ), + ( + Newline, + 67..68, + ), + ( + FStringStart, + 68..70, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 70..72, + ), + ( + Lbrace, + 72..73, + ), + ( + NonLogicalNewline, + 73..74, + ), + ( + Name { + name: "x", + }, + 78..79, + ), + ( + Colon, + 79..80, + ), + ( + FStringMiddle { + value: "d", + is_raw: false, + }, + 80..81, + ), + ( + NonLogicalNewline, + 81..82, + ), + ( + Rbrace, + 82..83, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 83..85, + ), + ( + FStringEnd, + 85..86, + ), + ( + Newline, + 86..87, + ), + ( + FStringStart, + 87..89, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 89..91, + ), + ( + Lbrace, + 91..92, + ), + ( + NonLogicalNewline, + 92..93, + ), + ( + Name { + name: "x", + }, + 97..98, + ), + ( + Colon, + 98..99, + ), + ( + FStringMiddle { + value: "a", + is_raw: false, + }, + 99..100, + ), + ( + NonLogicalNewline, + 100..101, + ), + ( + Name { + name: "b", + }, + 109..110, + ), + ( + NonLogicalNewline, + 110..111, + ), + ( + Rbrace, + 111..112, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 112..114, + ), + ( + FStringEnd, + 114..115, + ), + ( + Newline, + 115..116, + ), +] diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap index c897a798b5d76..0188a187e7aed 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap @@ -845,4 +845,53 @@ expression: parse_ast ), }, ), + Expr( + StmtExpr { + range: 304..344, + value: FString( + ExprFString { + range: 304..344, + values: [ + FormattedValue( + ExprFormattedValue { + range: 308..341, + value: Name( + ExprName { + range: 314..317, + id: "foo", + ctx: Load, + }, + ), + debug_text: None, + conversion: None, + format_spec: Some( + FString( + ExprFString { + range: 318..340, + values: [ + Constant( + ExprConstant { + range: 318..340, + value: Str( + StringConstant { + value: "x\n y\n z\n", + unicode: false, + implicit_concatenated: false, + }, + ), + }, + ), + ], + implicit_concatenated: false, + }, + ), + ), + }, + ), + ], + implicit_concatenated: false, + }, + ), + }, + ), ]