From bcabccaff513de23865ea5e188e022aebafe60a9 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 21 Nov 2023 13:54:51 -0600 Subject: [PATCH] Update the formatter code for the new AST design --- crates/ruff_python_ast/src/expression.rs | 25 ++ .../src/comments/placement.rs | 4 +- .../src/expression/expr_bin_op.rs | 8 +- .../src/expression/expr_bytes_literal.rs | 2 +- .../src/expression/expr_compare.rs | 8 +- .../src/expression/expr_f_string.rs | 2 +- .../src/expression/expr_string_literal.rs | 2 +- .../src/expression/mod.rs | 23 +- .../src/expression/string.rs | 255 +++++------------- .../src/statement/suite.rs | 12 +- .../ruff_python_formatter/tests/normalizer.rs | 26 +- 11 files changed, 134 insertions(+), 233 deletions(-) diff --git a/crates/ruff_python_ast/src/expression.rs b/crates/ruff_python_ast/src/expression.rs index 4ed79eeb36695..4bdc46f7dbf8d 100644 --- a/crates/ruff_python_ast/src/expression.rs +++ b/crates/ruff_python_ast/src/expression.rs @@ -362,6 +362,31 @@ impl Ranged for LiteralExpressionRef<'_> { } } +impl<'a> From> for AnyNodeRef<'a> { + fn from(value: LiteralExpressionRef<'a>) -> Self { + match value { + LiteralExpressionRef::StringLiteral(expression) => { + AnyNodeRef::ExprStringLiteral(expression) + } + LiteralExpressionRef::BytesLiteral(expression) => { + AnyNodeRef::ExprBytesLiteral(expression) + } + LiteralExpressionRef::NumberLiteral(expression) => { + AnyNodeRef::ExprNumberLiteral(expression) + } + LiteralExpressionRef::BooleanLiteral(expression) => { + AnyNodeRef::ExprBooleanLiteral(expression) + } + LiteralExpressionRef::NoneLiteral(expression) => { + AnyNodeRef::ExprNoneLiteral(expression) + } + LiteralExpressionRef::EllipsisLiteral(expression) => { + AnyNodeRef::ExprEllipsisLiteral(expression) + } + } + } +} + impl LiteralExpressionRef<'_> { /// Returns `true` if the literal is either a string or bytes literal that /// is implicitly concatenated. diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs index 5fe12bd190322..4b667fd23629f 100644 --- a/crates/ruff_python_formatter/src/comments/placement.rs +++ b/crates/ruff_python_formatter/src/comments/placement.rs @@ -283,13 +283,13 @@ fn handle_enclosed_comment<'a>( AnyNodeRef::StmtWith(with_) => handle_with_comment(comment, with_), AnyNodeRef::ExprCall(_) => handle_call_comment(comment), AnyNodeRef::ExprStringLiteral(_) => { - if let Some(AnyNodeRef::ExprFString(fstring)) = comment.enclosing_parent() { + if let Some(AnyNodeRef::FString(fstring)) = comment.enclosing_parent() { CommentPlacement::dangling(fstring, comment) } else { CommentPlacement::Default(comment) } } - AnyNodeRef::ExprFString(fstring) => CommentPlacement::dangling(fstring, comment), + AnyNodeRef::FString(fstring) => CommentPlacement::dangling(fstring, comment), AnyNodeRef::ExprList(_) | AnyNodeRef::ExprSet(_) | AnyNodeRef::ExprListComp(_) diff --git a/crates/ruff_python_formatter/src/expression/expr_bin_op.rs b/crates/ruff_python_formatter/src/expression/expr_bin_op.rs index 4a9361881d7e5..337dd825f2f4f 100644 --- a/crates/ruff_python_formatter/src/expression/expr_bin_op.rs +++ b/crates/ruff_python_formatter/src/expression/expr_bin_op.rs @@ -35,13 +35,13 @@ impl NeedsParentheses for ExprBinOp { ) -> OptionalParentheses { if parent.is_expr_await() { OptionalParentheses::Always - } else if self.left.is_literal_expr() { + } else if let Some(literal_expr) = self.left.as_literal_expr() { // Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses - if !self.left.is_implicit_concatenated_string() - && is_multiline_string(self.left.as_ref().into(), context.source()) + if !literal_expr.is_implicit_concatenated() + && is_multiline_string(literal_expr.into(), context.source()) && has_parentheses(&self.right, context).is_some() && !context.comments().has_dangling(self) - && !context.comments().has(self.left.as_ref()) + && !context.comments().has(literal_expr) && !context.comments().has(self.right.as_ref()) { OptionalParentheses::Never diff --git a/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs b/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs index 968487a07cb4a..2fc0cd474ce77 100644 --- a/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs +++ b/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs @@ -31,7 +31,7 @@ impl NeedsParentheses for ExprBytesLiteral { _parent: AnyNodeRef, context: &PyFormatContext, ) -> OptionalParentheses { - if self.implicit_concatenated { + if self.value.is_implicit_concatenated() { OptionalParentheses::Multiline } else if is_multiline_string(self.into(), context.source()) { OptionalParentheses::Never diff --git a/crates/ruff_python_formatter/src/expression/expr_compare.rs b/crates/ruff_python_formatter/src/expression/expr_compare.rs index f48bd66eccd9c..e9a338075e318 100644 --- a/crates/ruff_python_formatter/src/expression/expr_compare.rs +++ b/crates/ruff_python_formatter/src/expression/expr_compare.rs @@ -37,11 +37,11 @@ impl NeedsParentheses for ExprCompare { ) -> OptionalParentheses { if parent.is_expr_await() { OptionalParentheses::Always - } else if self.left.is_literal_expr() { + } else if let Some(literal_expr) = self.left.as_literal_expr() { // Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses - if !self.left.is_implicit_concatenated_string() - && is_multiline_string(self.left.as_ref().into(), context.source()) - && !context.comments().has(self.left.as_ref()) + if !literal_expr.is_implicit_concatenated() + && is_multiline_string(literal_expr.into(), context.source()) + && !context.comments().has(literal_expr) && self.comparators.first().is_some_and(|right| { has_parentheses(right, context).is_some() && !context.comments().has(right) }) diff --git a/crates/ruff_python_formatter/src/expression/expr_f_string.rs b/crates/ruff_python_formatter/src/expression/expr_f_string.rs index 51beb4dbb8f4a..12e112ecc1638 100644 --- a/crates/ruff_python_formatter/src/expression/expr_f_string.rs +++ b/crates/ruff_python_formatter/src/expression/expr_f_string.rs @@ -34,7 +34,7 @@ impl NeedsParentheses for ExprFString { _parent: AnyNodeRef, context: &PyFormatContext, ) -> OptionalParentheses { - if self.implicit_concatenated { + if self.value.is_implicit_concatenated() { OptionalParentheses::Multiline } else if memchr2(b'\n', b'\r', context.source()[self.range].as_bytes()).is_none() { OptionalParentheses::BestFit diff --git a/crates/ruff_python_formatter/src/expression/expr_string_literal.rs b/crates/ruff_python_formatter/src/expression/expr_string_literal.rs index e6ce374c292fb..199fb740ef712 100644 --- a/crates/ruff_python_formatter/src/expression/expr_string_literal.rs +++ b/crates/ruff_python_formatter/src/expression/expr_string_literal.rs @@ -46,7 +46,7 @@ impl NeedsParentheses for ExprStringLiteral { _parent: AnyNodeRef, context: &PyFormatContext, ) -> OptionalParentheses { - if self.implicit_concatenated { + if self.value.is_implicit_concatenated() { OptionalParentheses::Multiline } else if is_multiline_string(self.into(), context.source()) { OptionalParentheses::Never diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs index 019546e596edd..86fae5137a448 100644 --- a/crates/ruff_python_formatter/src/expression/mod.rs +++ b/crates/ruff_python_formatter/src/expression/mod.rs @@ -804,18 +804,17 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> { return; } - Expr::StringLiteral(ast::ExprStringLiteral { - implicit_concatenated: true, - .. - }) - | Expr::BytesLiteral(ast::ExprBytesLiteral { - implicit_concatenated: true, - .. - }) - | Expr::FString(ast::ExprFString { - implicit_concatenated: true, - .. - }) => { + Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) + if value.is_implicit_concatenated() => + { + self.update_max_precedence(OperatorPrecedence::String); + } + Expr::BytesLiteral(ast::ExprBytesLiteral { value, .. }) + if value.is_implicit_concatenated() => + { + self.update_max_precedence(OperatorPrecedence::String); + } + Expr::FString(ast::ExprFString { value, .. }) if value.is_implicit_concatenated() => { self.update_max_precedence(OperatorPrecedence::String); } diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 0fef24af91cf4..cb534d49a4517 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -2,13 +2,11 @@ use std::borrow::Cow; use bitflags::bitflags; -use ruff_formatter::{format_args, write, FormatError}; +use ruff_formatter::{format_args, write}; use ruff_python_ast::AnyNodeRef; use ruff_python_ast::{ self as ast, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef, }; -use ruff_python_parser::lexer::{lex_starts_at, LexicalError, LexicalErrorType}; -use ruff_python_parser::{Mode, Tok}; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; @@ -52,7 +50,7 @@ impl<'a> AnyString<'a> { .trim_start_matches(|c| c != '"' && c != '\''); let triple_quoted = unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''"); - if f_string.values.iter().any(|value| match value { + if f_string.value.elements().any(|value| match value { Expr::FormattedValue(ast::ExprFormattedValue { range, .. }) => { let string_content = locator.slice(*range); if triple_quoted { @@ -74,18 +72,29 @@ impl<'a> AnyString<'a> { /// Returns `true` if the string is implicitly concatenated. pub(super) fn is_implicit_concatenated(&self) -> bool { match self { - Self::String(ExprStringLiteral { - implicit_concatenated, - .. - }) => *implicit_concatenated, - Self::Bytes(ExprBytesLiteral { - implicit_concatenated, - .. - }) => *implicit_concatenated, - Self::FString(ExprFString { - implicit_concatenated, - .. - }) => *implicit_concatenated, + Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(), + Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(), + Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(), + } + } + + fn parts(&self) -> Vec> { + match self { + Self::String(ExprStringLiteral { value, .. }) => { + value.parts().map(AnyStringPart::String).collect() + } + Self::Bytes(ExprBytesLiteral { value, .. }) => { + value.parts().map(AnyStringPart::Bytes).collect() + } + Self::FString(ExprFString { value, .. }) => value + .parts() + .map(|f_string_part| match f_string_part { + ast::FStringPart::Literal(string_literal) => { + AnyStringPart::String(string_literal) + } + ast::FStringPart::FString(f_string) => AnyStringPart::FString(f_string), + }) + .collect(), } } } @@ -120,6 +129,33 @@ impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> { } } +#[derive(Clone, Debug)] +enum AnyStringPart<'a> { + String(&'a ast::StringLiteral), + Bytes(&'a ast::BytesLiteral), + FString(&'a ast::FString), +} + +impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> { + fn from(value: &AnyStringPart<'a>) -> Self { + match value { + AnyStringPart::String(part) => AnyNodeRef::StringLiteral(part), + AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part), + AnyStringPart::FString(part) => AnyNodeRef::FString(part), + } + } +} + +impl Ranged for AnyStringPart<'_> { + fn range(&self) -> TextRange { + match self { + Self::String(part) => part.range(), + Self::Bytes(part) => part.range(), + Self::FString(part) => part.range(), + } + } +} + pub(super) struct FormatString<'a> { string: &'a AnyString<'a>, layout: StringLayout, @@ -185,7 +221,7 @@ impl<'a> Format> for FormatString<'a> { // comments. if let AnyString::FString(fstring) = self.string { let comments = f.context().comments(); - fstring.values.iter().for_each(|value| { + fstring.value.elements().for_each(|value| { comments.mark_verbatim_node_comments_formatted(value.into()); }); } @@ -193,60 +229,6 @@ impl<'a> Format> for FormatString<'a> { } } -/// A builder for the f-string range. -/// -/// For now, this is limited to the outermost f-string and doesn't support -/// nested f-strings. -#[derive(Debug, Default)] -struct FStringRangeBuilder { - start_location: TextSize, - end_location: TextSize, - nesting: u32, -} - -impl FStringRangeBuilder { - fn visit_token(&mut self, token: &Tok, range: TextRange) { - match token { - Tok::FStringStart => { - if self.nesting == 0 { - self.start_location = range.start(); - } - self.nesting += 1; - } - Tok::FStringEnd => { - // We can assume that this will never overflow because we know - // that the program once parsed to a valid AST which means that - // the start and end tokens for f-strings are balanced. - self.nesting -= 1; - if self.nesting == 0 { - self.end_location = range.end(); - } - } - _ => {} - } - } - - /// Returns `true` if the lexer is currently inside of a f-string. - /// - /// It'll return `false` once the `FStringEnd` token for the outermost - /// f-string is visited. - const fn in_fstring(&self) -> bool { - self.nesting > 0 - } - - /// Returns the complete range of the previously visited f-string. - /// - /// This method should only be called once the lexer is outside of any - /// f-string otherwise it might return an invalid range. - /// - /// It doesn't consume the builder because there can be multiple f-strings - /// throughout the source code. - fn finish(&self) -> TextRange { - debug_assert!(!self.in_fstring()); - TextRange::new(self.start_location, self.end_location) - } -} - struct FormatStringContinuation<'a> { string: &'a AnyString<'a>, } @@ -262,129 +244,24 @@ impl Format> for FormatStringContinuation<'_> { let comments = f.context().comments().clone(); let locator = f.context().locator(); let quote_style = f.options().quote_style(); - let mut dangling_comments = comments.dangling(self.string); - - let string_range = self.string.range(); - let string_content = locator.slice(string_range); - - // The AST parses implicit concatenation as a single string. - // Call into the lexer to extract the individual chunks and format each string on its own. - // This code does not yet implement the automatic joining of strings that fit on the same line - // because this is a black preview style. - let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start()); - - // The lexer emits multiple tokens for a single f-string literal. Each token - // will have it's own range but we require the complete range of the f-string. - let mut fstring_range_builder = FStringRangeBuilder::default(); let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space()); - for token in lexer { - let (token, token_range) = match token { - Ok(spanned) => spanned, - Err(LexicalError { - error: LexicalErrorType::IndentationError, - .. - }) => { - // This can happen if the string continuation appears anywhere inside of a parenthesized expression - // because the lexer doesn't know about the parentheses. For example, the following snipped triggers an Indentation error - // ```python - // { - // "key": ( - // [], - // 'a' - // 'b' - // 'c' - // ) - // } - // ``` - // Ignoring the error here is *safe* because we know that the program once parsed to a valid AST. - continue; - } - Err(_) => { - return Err(FormatError::syntax_error( - "Unexpected lexer error in string formatting", - )); - } - }; - - fstring_range_builder.visit_token(&token, token_range); - - // We need to ignore all the tokens within the f-string as there can - // be `String` tokens inside it as well. For example, - // - // ```python - // f"foo {'bar'} foo" - // # ^^^^^ - // # Ignore any logic for this `String` token - // ``` - // - // Here, we're interested in the complete f-string, not the individual - // tokens inside it. - if fstring_range_builder.in_fstring() { - continue; - } - - match token { - Tok::String { .. } | Tok::FStringEnd => { - let token_range = if token.is_f_string_end() { - fstring_range_builder.finish() - } else { - token_range - }; - - // ```python - // ( - // "a" - // # leading - // "the comment above" - // ) - // ``` - let leading_comments_end = dangling_comments - .partition_point(|comment| comment.start() <= token_range.start()); - - let (leading_part_comments, rest) = - dangling_comments.split_at(leading_comments_end); - - // ```python - // ( - // "a" # trailing comment - // "the comment above" - // ) - // ``` - let trailing_comments_end = rest.partition_point(|comment| { - comment.line_position().is_end_of_line() - && !locator.contains_line_break(TextRange::new( - token_range.end(), - comment.start(), - )) - }); - - let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end); - let part = StringPart::from_source(token_range, &locator); - let normalized = - part.normalize(self.string.quoting(&locator), &locator, quote_style); - - joiner.entry(&format_args![ - line_suffix_boundary(), - leading_comments(leading_part_comments), - normalized, - trailing_comments(trailing_part_comments) - ]); - - dangling_comments = rest; - } - Tok::Comment(_) - | Tok::NonLogicalNewline - | Tok::Newline - | Tok::Indent - | Tok::Dedent => continue, - token => unreachable!("Unexpected token {token:?}"), - } + for part in self.string.parts() { + let normalized = StringPart::from_source(part.range(), &locator).normalize( + self.string.quoting(&locator), + &locator, + quote_style, + ); + + joiner.entry(&format_args![ + line_suffix_boundary(), + leading_comments(comments.leading(&part)), + normalized, + trailing_comments(comments.trailing(&part)) + ]); } - debug_assert!(dangling_comments.is_empty()); - joiner.finish() } } diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs index 65ed7faec11d5..d8001cebe3c43 100644 --- a/crates/ruff_python_formatter/src/statement/suite.rs +++ b/crates/ruff_python_formatter/src/statement/suite.rs @@ -569,10 +569,14 @@ impl<'a> DocstringStmt<'a> { }; match value.as_ref() { - Expr::StringLiteral(value) if !value.implicit_concatenated => Some(DocstringStmt { - docstring: stmt, - suite_kind, - }), + Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) + if !value.is_implicit_concatenated() => + { + Some(DocstringStmt { + docstring: stmt, + suite_kind, + }) + } _ => None, } } diff --git a/crates/ruff_python_formatter/tests/normalizer.rs b/crates/ruff_python_formatter/tests/normalizer.rs index 68c15349dd173..991c4206fb9c2 100644 --- a/crates/ruff_python_formatter/tests/normalizer.rs +++ b/crates/ruff_python_formatter/tests/normalizer.rs @@ -51,20 +51,16 @@ impl Transformer for Normalizer { transformer::walk_stmt(self, stmt); } - fn visit_expr(&self, expr: &mut Expr) { - if let Expr::StringLiteral(string_literal) = expr { - // Normalize a string by (1) stripping any leading and trailing space from each - // line, and (2) removing any blank lines from the start and end of the string. - string_literal.value = string_literal - .value - .lines() - .map(str::trim) - .collect::>() - .join("\n") - .trim() - .to_owned(); - } - - transformer::walk_expr(self, expr); + fn visit_string_literal(&self, string_literal: &mut ast::StringLiteral) { + // Normalize a string by (1) stripping any leading and trailing space from each + // line, and (2) removing any blank lines from the start and end of the string. + string_literal.value = string_literal + .value + .lines() + .map(str::trim) + .collect::>() + .join("\n") + .trim() + .to_owned(); } }