From bcabccaff513de23865ea5e188e022aebafe60a9 Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Tue, 21 Nov 2023 13:54:51 -0600
Subject: [PATCH] Update the formatter code for the new AST design

---
 crates/ruff_python_ast/src/expression.rs      |  25 ++
 .../src/comments/placement.rs                 |   4 +-
 .../src/expression/expr_bin_op.rs             |   8 +-
 .../src/expression/expr_bytes_literal.rs      |   2 +-
 .../src/expression/expr_compare.rs            |   8 +-
 .../src/expression/expr_f_string.rs           |   2 +-
 .../src/expression/expr_string_literal.rs     |   2 +-
 .../src/expression/mod.rs                     |  23 +-
 .../src/expression/string.rs                  | 255 +++++-------------
 .../src/statement/suite.rs                    |  12 +-
 .../ruff_python_formatter/tests/normalizer.rs |  26 +-
 11 files changed, 134 insertions(+), 233 deletions(-)

diff --git a/crates/ruff_python_ast/src/expression.rs b/crates/ruff_python_ast/src/expression.rs
index 4ed79eeb36695..4bdc46f7dbf8d 100644
--- a/crates/ruff_python_ast/src/expression.rs
+++ b/crates/ruff_python_ast/src/expression.rs
@@ -362,6 +362,31 @@ impl Ranged for LiteralExpressionRef<'_> {
     }
 }
 
+impl<'a> From<LiteralExpressionRef<'a>> for AnyNodeRef<'a> {
+    fn from(value: LiteralExpressionRef<'a>) -> Self {
+        match value {
+            LiteralExpressionRef::StringLiteral(expression) => {
+                AnyNodeRef::ExprStringLiteral(expression)
+            }
+            LiteralExpressionRef::BytesLiteral(expression) => {
+                AnyNodeRef::ExprBytesLiteral(expression)
+            }
+            LiteralExpressionRef::NumberLiteral(expression) => {
+                AnyNodeRef::ExprNumberLiteral(expression)
+            }
+            LiteralExpressionRef::BooleanLiteral(expression) => {
+                AnyNodeRef::ExprBooleanLiteral(expression)
+            }
+            LiteralExpressionRef::NoneLiteral(expression) => {
+                AnyNodeRef::ExprNoneLiteral(expression)
+            }
+            LiteralExpressionRef::EllipsisLiteral(expression) => {
+                AnyNodeRef::ExprEllipsisLiteral(expression)
+            }
+        }
+    }
+}
+
 impl LiteralExpressionRef<'_> {
     /// Returns `true` if the literal is either a string or bytes literal that
     /// is implicitly concatenated.
diff --git a/crates/ruff_python_formatter/src/comments/placement.rs b/crates/ruff_python_formatter/src/comments/placement.rs
index 5fe12bd190322..4b667fd23629f 100644
--- a/crates/ruff_python_formatter/src/comments/placement.rs
+++ b/crates/ruff_python_formatter/src/comments/placement.rs
@@ -283,13 +283,13 @@ fn handle_enclosed_comment<'a>(
         AnyNodeRef::StmtWith(with_) => handle_with_comment(comment, with_),
         AnyNodeRef::ExprCall(_) => handle_call_comment(comment),
         AnyNodeRef::ExprStringLiteral(_) => {
-            if let Some(AnyNodeRef::ExprFString(fstring)) = comment.enclosing_parent() {
+            if let Some(AnyNodeRef::FString(fstring)) = comment.enclosing_parent() {
                 CommentPlacement::dangling(fstring, comment)
             } else {
                 CommentPlacement::Default(comment)
             }
         }
-        AnyNodeRef::ExprFString(fstring) => CommentPlacement::dangling(fstring, comment),
+        AnyNodeRef::FString(fstring) => CommentPlacement::dangling(fstring, comment),
         AnyNodeRef::ExprList(_)
         | AnyNodeRef::ExprSet(_)
         | AnyNodeRef::ExprListComp(_)
diff --git a/crates/ruff_python_formatter/src/expression/expr_bin_op.rs b/crates/ruff_python_formatter/src/expression/expr_bin_op.rs
index 4a9361881d7e5..337dd825f2f4f 100644
--- a/crates/ruff_python_formatter/src/expression/expr_bin_op.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_bin_op.rs
@@ -35,13 +35,13 @@ impl NeedsParentheses for ExprBinOp {
     ) -> OptionalParentheses {
         if parent.is_expr_await() {
             OptionalParentheses::Always
-        } else if self.left.is_literal_expr() {
+        } else if let Some(literal_expr) = self.left.as_literal_expr() {
             // Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses
-            if !self.left.is_implicit_concatenated_string()
-                && is_multiline_string(self.left.as_ref().into(), context.source())
+            if !literal_expr.is_implicit_concatenated()
+                && is_multiline_string(literal_expr.into(), context.source())
                 && has_parentheses(&self.right, context).is_some()
                 && !context.comments().has_dangling(self)
-                && !context.comments().has(self.left.as_ref())
+                && !context.comments().has(literal_expr)
                 && !context.comments().has(self.right.as_ref())
             {
                 OptionalParentheses::Never
diff --git a/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs b/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs
index 968487a07cb4a..2fc0cd474ce77 100644
--- a/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_bytes_literal.rs
@@ -31,7 +31,7 @@ impl NeedsParentheses for ExprBytesLiteral {
         _parent: AnyNodeRef,
         context: &PyFormatContext,
     ) -> OptionalParentheses {
-        if self.implicit_concatenated {
+        if self.value.is_implicit_concatenated() {
             OptionalParentheses::Multiline
         } else if is_multiline_string(self.into(), context.source()) {
             OptionalParentheses::Never
diff --git a/crates/ruff_python_formatter/src/expression/expr_compare.rs b/crates/ruff_python_formatter/src/expression/expr_compare.rs
index f48bd66eccd9c..e9a338075e318 100644
--- a/crates/ruff_python_formatter/src/expression/expr_compare.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_compare.rs
@@ -37,11 +37,11 @@ impl NeedsParentheses for ExprCompare {
     ) -> OptionalParentheses {
         if parent.is_expr_await() {
             OptionalParentheses::Always
-        } else if self.left.is_literal_expr() {
+        } else if let Some(literal_expr) = self.left.as_literal_expr() {
             // Multiline strings are guaranteed to never fit, avoid adding unnecessary parentheses
-            if !self.left.is_implicit_concatenated_string()
-                && is_multiline_string(self.left.as_ref().into(), context.source())
-                && !context.comments().has(self.left.as_ref())
+            if !literal_expr.is_implicit_concatenated()
+                && is_multiline_string(literal_expr.into(), context.source())
+                && !context.comments().has(literal_expr)
                 && self.comparators.first().is_some_and(|right| {
                     has_parentheses(right, context).is_some() && !context.comments().has(right)
                 })
diff --git a/crates/ruff_python_formatter/src/expression/expr_f_string.rs b/crates/ruff_python_formatter/src/expression/expr_f_string.rs
index 51beb4dbb8f4a..12e112ecc1638 100644
--- a/crates/ruff_python_formatter/src/expression/expr_f_string.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_f_string.rs
@@ -34,7 +34,7 @@ impl NeedsParentheses for ExprFString {
         _parent: AnyNodeRef,
         context: &PyFormatContext,
     ) -> OptionalParentheses {
-        if self.implicit_concatenated {
+        if self.value.is_implicit_concatenated() {
             OptionalParentheses::Multiline
         } else if memchr2(b'\n', b'\r', context.source()[self.range].as_bytes()).is_none() {
             OptionalParentheses::BestFit
diff --git a/crates/ruff_python_formatter/src/expression/expr_string_literal.rs b/crates/ruff_python_formatter/src/expression/expr_string_literal.rs
index e6ce374c292fb..199fb740ef712 100644
--- a/crates/ruff_python_formatter/src/expression/expr_string_literal.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_string_literal.rs
@@ -46,7 +46,7 @@ impl NeedsParentheses for ExprStringLiteral {
         _parent: AnyNodeRef,
         context: &PyFormatContext,
     ) -> OptionalParentheses {
-        if self.implicit_concatenated {
+        if self.value.is_implicit_concatenated() {
             OptionalParentheses::Multiline
         } else if is_multiline_string(self.into(), context.source()) {
             OptionalParentheses::Never
diff --git a/crates/ruff_python_formatter/src/expression/mod.rs b/crates/ruff_python_formatter/src/expression/mod.rs
index 019546e596edd..86fae5137a448 100644
--- a/crates/ruff_python_formatter/src/expression/mod.rs
+++ b/crates/ruff_python_formatter/src/expression/mod.rs
@@ -804,18 +804,17 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
                 return;
             }
 
-            Expr::StringLiteral(ast::ExprStringLiteral {
-                implicit_concatenated: true,
-                ..
-            })
-            | Expr::BytesLiteral(ast::ExprBytesLiteral {
-                implicit_concatenated: true,
-                ..
-            })
-            | Expr::FString(ast::ExprFString {
-                implicit_concatenated: true,
-                ..
-            }) => {
+            Expr::StringLiteral(ast::ExprStringLiteral { value, .. })
+                if value.is_implicit_concatenated() =>
+            {
+                self.update_max_precedence(OperatorPrecedence::String);
+            }
+            Expr::BytesLiteral(ast::ExprBytesLiteral { value, .. })
+                if value.is_implicit_concatenated() =>
+            {
+                self.update_max_precedence(OperatorPrecedence::String);
+            }
+            Expr::FString(ast::ExprFString { value, .. }) if value.is_implicit_concatenated() => {
                 self.update_max_precedence(OperatorPrecedence::String);
             }
 
diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs
index 0fef24af91cf4..cb534d49a4517 100644
--- a/crates/ruff_python_formatter/src/expression/string.rs
+++ b/crates/ruff_python_formatter/src/expression/string.rs
@@ -2,13 +2,11 @@ use std::borrow::Cow;
 
 use bitflags::bitflags;
 
-use ruff_formatter::{format_args, write, FormatError};
+use ruff_formatter::{format_args, write};
 use ruff_python_ast::AnyNodeRef;
 use ruff_python_ast::{
     self as ast, ExprBytesLiteral, ExprFString, ExprStringLiteral, ExpressionRef,
 };
-use ruff_python_parser::lexer::{lex_starts_at, LexicalError, LexicalErrorType};
-use ruff_python_parser::{Mode, Tok};
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
 
@@ -52,7 +50,7 @@ impl<'a> AnyString<'a> {
                     .trim_start_matches(|c| c != '"' && c != '\'');
                 let triple_quoted =
                     unprefixed.starts_with(r#"""""#) || unprefixed.starts_with(r"'''");
-                if f_string.values.iter().any(|value| match value {
+                if f_string.value.elements().any(|value| match value {
                     Expr::FormattedValue(ast::ExprFormattedValue { range, .. }) => {
                         let string_content = locator.slice(*range);
                         if triple_quoted {
@@ -74,18 +72,29 @@ impl<'a> AnyString<'a> {
     /// Returns `true` if the string is implicitly concatenated.
     pub(super) fn is_implicit_concatenated(&self) -> bool {
         match self {
-            Self::String(ExprStringLiteral {
-                implicit_concatenated,
-                ..
-            }) => *implicit_concatenated,
-            Self::Bytes(ExprBytesLiteral {
-                implicit_concatenated,
-                ..
-            }) => *implicit_concatenated,
-            Self::FString(ExprFString {
-                implicit_concatenated,
-                ..
-            }) => *implicit_concatenated,
+            Self::String(ExprStringLiteral { value, .. }) => value.is_implicit_concatenated(),
+            Self::Bytes(ExprBytesLiteral { value, .. }) => value.is_implicit_concatenated(),
+            Self::FString(ExprFString { value, .. }) => value.is_implicit_concatenated(),
+        }
+    }
+
+    fn parts(&self) -> Vec<AnyStringPart<'a>> {
+        match self {
+            Self::String(ExprStringLiteral { value, .. }) => {
+                value.parts().map(AnyStringPart::String).collect()
+            }
+            Self::Bytes(ExprBytesLiteral { value, .. }) => {
+                value.parts().map(AnyStringPart::Bytes).collect()
+            }
+            Self::FString(ExprFString { value, .. }) => value
+                .parts()
+                .map(|f_string_part| match f_string_part {
+                    ast::FStringPart::Literal(string_literal) => {
+                        AnyStringPart::String(string_literal)
+                    }
+                    ast::FStringPart::FString(f_string) => AnyStringPart::FString(f_string),
+                })
+                .collect(),
         }
     }
 }
@@ -120,6 +129,33 @@ impl<'a> From<&AnyString<'a>> for ExpressionRef<'a> {
     }
 }
 
+#[derive(Clone, Debug)]
+enum AnyStringPart<'a> {
+    String(&'a ast::StringLiteral),
+    Bytes(&'a ast::BytesLiteral),
+    FString(&'a ast::FString),
+}
+
+impl<'a> From<&AnyStringPart<'a>> for AnyNodeRef<'a> {
+    fn from(value: &AnyStringPart<'a>) -> Self {
+        match value {
+            AnyStringPart::String(part) => AnyNodeRef::StringLiteral(part),
+            AnyStringPart::Bytes(part) => AnyNodeRef::BytesLiteral(part),
+            AnyStringPart::FString(part) => AnyNodeRef::FString(part),
+        }
+    }
+}
+
+impl Ranged for AnyStringPart<'_> {
+    fn range(&self) -> TextRange {
+        match self {
+            Self::String(part) => part.range(),
+            Self::Bytes(part) => part.range(),
+            Self::FString(part) => part.range(),
+        }
+    }
+}
+
 pub(super) struct FormatString<'a> {
     string: &'a AnyString<'a>,
     layout: StringLayout,
@@ -185,7 +221,7 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
         // comments.
         if let AnyString::FString(fstring) = self.string {
             let comments = f.context().comments();
-            fstring.values.iter().for_each(|value| {
+            fstring.value.elements().for_each(|value| {
                 comments.mark_verbatim_node_comments_formatted(value.into());
             });
         }
@@ -193,60 +229,6 @@ impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
     }
 }
 
-/// A builder for the f-string range.
-///
-/// For now, this is limited to the outermost f-string and doesn't support
-/// nested f-strings.
-#[derive(Debug, Default)]
-struct FStringRangeBuilder {
-    start_location: TextSize,
-    end_location: TextSize,
-    nesting: u32,
-}
-
-impl FStringRangeBuilder {
-    fn visit_token(&mut self, token: &Tok, range: TextRange) {
-        match token {
-            Tok::FStringStart => {
-                if self.nesting == 0 {
-                    self.start_location = range.start();
-                }
-                self.nesting += 1;
-            }
-            Tok::FStringEnd => {
-                // We can assume that this will never overflow because we know
-                // that the program once parsed to a valid AST which means that
-                // the start and end tokens for f-strings are balanced.
-                self.nesting -= 1;
-                if self.nesting == 0 {
-                    self.end_location = range.end();
-                }
-            }
-            _ => {}
-        }
-    }
-
-    /// Returns `true` if the lexer is currently inside of a f-string.
-    ///
-    /// It'll return `false` once the `FStringEnd` token for the outermost
-    /// f-string is visited.
-    const fn in_fstring(&self) -> bool {
-        self.nesting > 0
-    }
-
-    /// Returns the complete range of the previously visited f-string.
-    ///
-    /// This method should only be called once the lexer is outside of any
-    /// f-string otherwise it might return an invalid range.
-    ///
-    /// It doesn't consume the builder because there can be multiple f-strings
-    /// throughout the source code.
-    fn finish(&self) -> TextRange {
-        debug_assert!(!self.in_fstring());
-        TextRange::new(self.start_location, self.end_location)
-    }
-}
-
 struct FormatStringContinuation<'a> {
     string: &'a AnyString<'a>,
 }
@@ -262,129 +244,24 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
         let comments = f.context().comments().clone();
         let locator = f.context().locator();
         let quote_style = f.options().quote_style();
-        let mut dangling_comments = comments.dangling(self.string);
-
-        let string_range = self.string.range();
-        let string_content = locator.slice(string_range);
-
-        // The AST parses implicit concatenation as a single string.
-        // Call into the lexer to extract the individual chunks and format each string on its own.
-        // This code does not yet implement the automatic joining of strings that fit on the same line
-        // because this is a black preview style.
-        let lexer = lex_starts_at(string_content, Mode::Expression, string_range.start());
-
-        // The lexer emits multiple tokens for a single f-string literal. Each token
-        // will have it's own range but we require the complete range of the f-string.
-        let mut fstring_range_builder = FStringRangeBuilder::default();
 
         let mut joiner = f.join_with(in_parentheses_only_soft_line_break_or_space());
 
-        for token in lexer {
-            let (token, token_range) = match token {
-                Ok(spanned) => spanned,
-                Err(LexicalError {
-                    error: LexicalErrorType::IndentationError,
-                    ..
-                }) => {
-                    // This can happen if the string continuation appears anywhere inside of a parenthesized expression
-                    // because the lexer doesn't know about the parentheses. For example, the following snipped triggers an Indentation error
-                    // ```python
-                    // {
-                    //     "key": (
-                    //         [],
-                    //         'a'
-                    //             'b'
-                    //          'c'
-                    //     )
-                    // }
-                    // ```
-                    // Ignoring the error here is *safe* because we know that the program once parsed to a valid AST.
-                    continue;
-                }
-                Err(_) => {
-                    return Err(FormatError::syntax_error(
-                        "Unexpected lexer error in string formatting",
-                    ));
-                }
-            };
-
-            fstring_range_builder.visit_token(&token, token_range);
-
-            // We need to ignore all the tokens within the f-string as there can
-            // be `String` tokens inside it as well. For example,
-            //
-            // ```python
-            // f"foo {'bar'} foo"
-            // #      ^^^^^
-            // #      Ignore any logic for this `String` token
-            // ```
-            //
-            // Here, we're interested in the complete f-string, not the individual
-            // tokens inside it.
-            if fstring_range_builder.in_fstring() {
-                continue;
-            }
-
-            match token {
-                Tok::String { .. } | Tok::FStringEnd => {
-                    let token_range = if token.is_f_string_end() {
-                        fstring_range_builder.finish()
-                    } else {
-                        token_range
-                    };
-
-                    // ```python
-                    // (
-                    //      "a"
-                    //      # leading
-                    //      "the comment above"
-                    // )
-                    // ```
-                    let leading_comments_end = dangling_comments
-                        .partition_point(|comment| comment.start() <= token_range.start());
-
-                    let (leading_part_comments, rest) =
-                        dangling_comments.split_at(leading_comments_end);
-
-                    // ```python
-                    // (
-                    //      "a" # trailing comment
-                    //      "the comment above"
-                    // )
-                    // ```
-                    let trailing_comments_end = rest.partition_point(|comment| {
-                        comment.line_position().is_end_of_line()
-                            && !locator.contains_line_break(TextRange::new(
-                                token_range.end(),
-                                comment.start(),
-                            ))
-                    });
-
-                    let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
-                    let part = StringPart::from_source(token_range, &locator);
-                    let normalized =
-                        part.normalize(self.string.quoting(&locator), &locator, quote_style);
-
-                    joiner.entry(&format_args![
-                        line_suffix_boundary(),
-                        leading_comments(leading_part_comments),
-                        normalized,
-                        trailing_comments(trailing_part_comments)
-                    ]);
-
-                    dangling_comments = rest;
-                }
-                Tok::Comment(_)
-                | Tok::NonLogicalNewline
-                | Tok::Newline
-                | Tok::Indent
-                | Tok::Dedent => continue,
-                token => unreachable!("Unexpected token {token:?}"),
-            }
+        for part in self.string.parts() {
+            let normalized = StringPart::from_source(part.range(), &locator).normalize(
+                self.string.quoting(&locator),
+                &locator,
+                quote_style,
+            );
+
+            joiner.entry(&format_args![
+                line_suffix_boundary(),
+                leading_comments(comments.leading(&part)),
+                normalized,
+                trailing_comments(comments.trailing(&part))
+            ]);
         }
 
-        debug_assert!(dangling_comments.is_empty());
-
         joiner.finish()
     }
 }
diff --git a/crates/ruff_python_formatter/src/statement/suite.rs b/crates/ruff_python_formatter/src/statement/suite.rs
index 65ed7faec11d5..d8001cebe3c43 100644
--- a/crates/ruff_python_formatter/src/statement/suite.rs
+++ b/crates/ruff_python_formatter/src/statement/suite.rs
@@ -569,10 +569,14 @@ impl<'a> DocstringStmt<'a> {
         };
 
         match value.as_ref() {
-            Expr::StringLiteral(value) if !value.implicit_concatenated => Some(DocstringStmt {
-                docstring: stmt,
-                suite_kind,
-            }),
+            Expr::StringLiteral(ast::ExprStringLiteral { value, .. })
+                if !value.is_implicit_concatenated() =>
+            {
+                Some(DocstringStmt {
+                    docstring: stmt,
+                    suite_kind,
+                })
+            }
             _ => None,
         }
     }
diff --git a/crates/ruff_python_formatter/tests/normalizer.rs b/crates/ruff_python_formatter/tests/normalizer.rs
index 68c15349dd173..991c4206fb9c2 100644
--- a/crates/ruff_python_formatter/tests/normalizer.rs
+++ b/crates/ruff_python_formatter/tests/normalizer.rs
@@ -51,20 +51,16 @@ impl Transformer for Normalizer {
         transformer::walk_stmt(self, stmt);
     }
 
-    fn visit_expr(&self, expr: &mut Expr) {
-        if let Expr::StringLiteral(string_literal) = expr {
-            // Normalize a string by (1) stripping any leading and trailing space from each
-            // line, and (2) removing any blank lines from the start and end of the string.
-            string_literal.value = string_literal
-                .value
-                .lines()
-                .map(str::trim)
-                .collect::<Vec<_>>()
-                .join("\n")
-                .trim()
-                .to_owned();
-        }
-
-        transformer::walk_expr(self, expr);
+    fn visit_string_literal(&self, string_literal: &mut ast::StringLiteral) {
+        // Normalize a string by (1) stripping any leading and trailing space from each
+        // line, and (2) removing any blank lines from the start and end of the string.
+        string_literal.value = string_literal
+            .value
+            .lines()
+            .map(str::trim)
+            .collect::<Vec<_>>()
+            .join("\n")
+            .trim()
+            .to_owned();
     }
 }