From 745a8788acaabb9d296a2a433018fd30212414a9 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 3 Oct 2023 22:40:11 +0530 Subject: [PATCH] Allow multi-line f-string with format spec --- crates/ruff_python_parser/src/lexer.rs | 43 ++++- ...s__fstring_with_multiline_format_spec.snap | 175 ++++++++++++++++++ 2 files changed, 214 insertions(+), 4 deletions(-) create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 3ba20e78e88e87..737c1016453519 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -566,6 +566,9 @@ impl<'source> Lexer<'source> { // Tracks the last offset of token value that has been written to `normalized`. let mut last_offset = self.offset(); + // This isn't going to change for the duration of the loop. + let in_format_spec = fstring.is_in_format_spec(self.nesting); + let mut in_named_unicode = false; loop { @@ -585,6 +588,24 @@ impl<'source> Lexer<'source> { }); } '\n' | '\r' if !fstring.is_triple_quoted() => { + // If we encounter a newline while we're in a format spec, then + // we stop here and let the lexer emit the newline token. + // + // This is not done for triple-quoted f-strings because newlines + // are significant in those. For example, + // + // ```python + // f"""__{ + // datetime.datetime.now():%Y + // %m + // %d + // }__""" + // + // # Output: '__2023\n 10\n 04\n__' + // ``` + if in_format_spec { + break; + } return Err(LexicalError { error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString), location: self.offset(), @@ -620,7 +641,7 @@ impl<'source> Lexer<'source> { } } '{' => { - if self.cursor.second() == '{' && !fstring.is_in_format_spec(self.nesting) { + if self.cursor.second() == '{' && !in_format_spec { self.cursor.bump(); normalized .push_str(&self.source[TextRange::new(last_offset, self.offset())]); @@ -634,9 +655,7 @@ impl<'source> Lexer<'source> { if in_named_unicode { in_named_unicode = false; self.cursor.bump(); - } else if self.cursor.second() == '}' - && !fstring.is_in_format_spec(self.nesting) - { + } else if self.cursor.second() == '}' && !in_format_spec { self.cursor.bump(); normalized .push_str(&self.source[TextRange::new(last_offset, self.offset())]); @@ -2051,6 +2070,22 @@ def f(arg=%timeit a = b): assert_debug_snapshot!(lex_source(source)); } + #[test] + fn test_fstring_with_multiline_format_spec() { + let source = r"f'''__{ + x:d +}__''' +f'''__{ + x:a + b + c +}__''' +f'__{ + x:d +}__'"; + assert_debug_snapshot!(lex_source(source)); + } + #[test] fn test_fstring_conversion() { let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#; diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap new file mode 100644 index 00000000000000..c88d685d40a685 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap @@ -0,0 +1,175 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(source) +--- +[ + ( + FStringStart, + 0..4, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 4..6, + ), + ( + Lbrace, + 6..7, + ), + ( + NonLogicalNewline, + 7..8, + ), + ( + Name { + name: "x", + }, + 12..13, + ), + ( + Colon, + 13..14, + ), + ( + FStringMiddle { + value: "d\n", + is_raw: false, + }, + 14..16, + ), + ( + Rbrace, + 16..17, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 17..19, + ), + ( + FStringEnd, + 19..22, + ), + ( + Newline, + 22..23, + ), + ( + FStringStart, + 23..27, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 27..29, + ), + ( + Lbrace, + 29..30, + ), + ( + NonLogicalNewline, + 30..31, + ), + ( + Name { + name: "x", + }, + 35..36, + ), + ( + Colon, + 36..37, + ), + ( + FStringMiddle { + value: "a\n b\n c\n", + is_raw: false, + }, + 37..61, + ), + ( + Rbrace, + 61..62, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 62..64, + ), + ( + FStringEnd, + 64..67, + ), + ( + Newline, + 67..68, + ), + ( + FStringStart, + 68..70, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 70..72, + ), + ( + Lbrace, + 72..73, + ), + ( + NonLogicalNewline, + 73..74, + ), + ( + Name { + name: "x", + }, + 78..79, + ), + ( + Colon, + 79..80, + ), + ( + FStringMiddle { + value: "d", + is_raw: false, + }, + 80..81, + ), + ( + NonLogicalNewline, + 81..82, + ), + ( + Rbrace, + 82..83, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 83..85, + ), + ( + FStringEnd, + 85..86, + ), + ( + Newline, + 86..86, + ), +]