astral-sh · charliermarsh · Jan 11, 2023 · Jan 2, 2023 · Jan 2, 2023 · Jan 3, 2023
diff --git a/README.md b/README.md
@@ -702,6 +702,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI.
 | UP027 | RewriteListComprehension | Replace unpacked list comprehension with a generator expression | 🛠 |
 | UP028 | RewriteYieldFrom | Replace `yield` over `for` loop with `yield from` | 🛠 |
 | UP029 | UnnecessaryBuiltinImport | Unnecessary builtin import: `...` | 🛠 |
+| UP030 | FormatLiterals | Use implicit references for positional format fields | 🛠 |
 
 ### pep8-naming (N)
 

diff --git a/resources/test/fixtures/pyupgrade/UP030_0.py b/resources/test/fixtures/pyupgrade/UP030_0.py
@@ -0,0 +1,36 @@
+# Invalid calls; errors expected.
+
+"{0}" "{1}" "{2}".format(1, 2, 3)
+
+"a {3} complicated {1} string with {0} {2}".format(
+    "first", "second", "third", "fourth"
+)
+
+'{0}'.format(1)
+
+'{0:x}'.format(30)
+
+x = '{0}'.format(1)
+
+'''{0}\n{1}\n'''.format(1, 2)
+
+x = "foo {0}" \
+    "bar {1}".format(1, 2)
+
+("{0}").format(1)
+
+"\N{snowman} {0}".format(1)
+
+'{' '0}'.format(1)
+
+# These will not change because we are waiting for libcst to fix this issue:
+# https://github.com/Instagram/LibCST/issues/846
+print(
+    'foo{0}'
+    'bar{1}'.format(1, 2)
+)
+
+print(
+    'foo{0}'  # ohai\n"
+    'bar{1}'.format(1, 2)
+)
diff --git a/resources/test/fixtures/pyupgrade/UP030_1.py b/resources/test/fixtures/pyupgrade/UP030_1.py
@@ -0,0 +1,23 @@
+# Valid calls; no errors expected.
+
+'{}'.format(1)
+
+
+x = ('{0} {1}',)
+
+'{0} {0}'.format(1)
+
+'{0:<{1}}'.format(1, 4)
+
+f"{0}".format(a)
+
+f"{0}".format(1)
+
+print(f"{0}".format(1))
+
+# I did not include the following tests because ruff does not seem to work with
+# invalid python syntax (which is a good thing)
+
+# "{0}"format(1)
+# '{'.format(1)", "'}'.format(1)
+# ("{0}" # {1}\n"{2}").format(1, 2, 3)
diff --git a/ruff.schema.json b/ruff.schema.json
@@ -1606,6 +1606,8 @@
         "UP027",
         "UP028",
         "UP029",
+        "UP03",
+        "UP030",
         "W",
         "W2",
         "W29",

diff --git a/src/checkers/ast.rs b/src/checkers/ast.rs
@@ -1830,6 +1830,8 @@ where
                     || self.settings.enabled.contains(&RuleCode::F523)
                     || self.settings.enabled.contains(&RuleCode::F524)
                     || self.settings.enabled.contains(&RuleCode::F525)
+                    // pyupgrade
+                    || self.settings.enabled.contains(&RuleCode::UP030)
                 {
                     if let ExprKind::Attribute { value, attr, .. } = &func.node {
                         if let ExprKind::Constant {
@@ -1876,6 +1878,10 @@ where
                                                 self, &summary, location,
                                             );
                                         }
+
+                                        if self.settings.enabled.contains(&RuleCode::UP030) {
+                                            pyupgrade::rules::format_literals(self, &summary, expr);
+                                        }
                                     }
                                 }
                             }

diff --git a/src/cst/matchers.rs b/src/cst/matchers.rs
@@ -1,5 +1,7 @@
 use anyhow::{bail, Result};
-use libcst_native::{Expr, Import, ImportFrom, Module, SmallStatement, Statement};
+use libcst_native::{
+    Call, Expr, Expression, Import, ImportFrom, Module, SmallStatement, Statement,
+};
 
 pub fn match_module(module_text: &str) -> Result<Module> {
     match libcst_native::parse_module(module_text, None) {
@@ -8,6 +10,13 @@ pub fn match_module(module_text: &str) -> Result<Module> {
     }
 }
 
+pub fn match_expression(expression_text: &str) -> Result<Expression> {
+    match libcst_native::parse_expression(expression_text) {
+        Ok(expression) => Ok(expression),
+        Err(_) => bail!("Failed to extract CST from source"),
+    }
+}
+
 pub fn match_expr<'a, 'b>(module: &'a mut Module<'b>) -> Result<&'a mut Expr<'b>> {
     if let Some(Statement::Simple(expr)) = module.body.first_mut() {
         if let Some(SmallStatement::Expr(expr)) = expr.body.first_mut() {
@@ -43,3 +52,11 @@ pub fn match_import_from<'a, 'b>(module: &'a mut Module<'b>) -> Result<&'a mut I
         bail!("Expected Statement::Simple")
     }
 }
+
+pub fn match_call<'a, 'b>(expression: &'a mut Expression<'b>) -> Result<&'a mut Call<'b>> {
+    if let Expression::Call(call) = expression {
+        Ok(call)
+    } else {
+        bail!("Expected SmallStatement::Expr")
+    }
+}
diff --git a/src/pyflakes/format.rs b/src/pyflakes/format.rs
@@ -1,7 +1,6 @@
 //! Implements helper functions for using vendored/format.rs
 use std::convert::TryFrom;
 
-use rustc_hash::FxHashSet;
 use rustpython_common::format::{
     FieldName, FieldType, FormatParseError, FormatPart, FormatString, FromTemplate,
 };
@@ -21,10 +20,12 @@ pub(crate) fn error_to_string(err: &FormatParseError) -> String {
     .to_string()
 }
 
+#[derive(Debug)]
 pub(crate) struct FormatSummary {
-    pub autos: FxHashSet<usize>,
-    pub indexes: FxHashSet<usize>,
-    pub keywords: FxHashSet<String>,
+    pub autos: Vec<usize>,
+    pub indexes: Vec<usize>,
+    pub keywords: Vec<String>,
+    pub has_nested_parts: bool,
 }
 
 impl TryFrom<&str> for FormatSummary {
@@ -33,9 +34,10 @@ impl TryFrom<&str> for FormatSummary {
     fn try_from(literal: &str) -> Result<Self, Self::Error> {
         let format_string = FormatString::from_str(literal)?;
 
-        let mut autos = FxHashSet::default();
-        let mut indexes = FxHashSet::default();
-        let mut keywords = FxHashSet::default();
+        let mut autos = Vec::new();
+        let mut indexes = Vec::new();
+        let mut keywords = Vec::new();
+        let mut has_nested_parts = false;
 
         for format_part in format_string.format_parts {
             let FormatPart::Field {
@@ -47,9 +49,9 @@ impl TryFrom<&str> for FormatSummary {
             };
             let parsed = FieldName::parse(&field_name)?;
             match parsed.field_type {
-                FieldType::Auto => autos.insert(autos.len()),
-                FieldType::Index(i) => indexes.insert(i),
-                FieldType::Keyword(k) => keywords.insert(k),
+                FieldType::Auto => autos.push(autos.len()),
+                FieldType::Index(i) => indexes.push(i),
+                FieldType::Keyword(k) => keywords.push(k),
             };
 
             let nested = FormatString::from_str(&format_spec)?;
@@ -59,17 +61,19 @@ impl TryFrom<&str> for FormatSummary {
                 };
                 let parsed = FieldName::parse(&field_name)?;
                 match parsed.field_type {
-                    FieldType::Auto => autos.insert(autos.len()),
-                    FieldType::Index(i) => indexes.insert(i),
-                    FieldType::Keyword(k) => keywords.insert(k),
+                    FieldType::Auto => autos.push(autos.len()),
+                    FieldType::Index(i) => indexes.push(i),
+                    FieldType::Keyword(k) => keywords.push(k),
                 };
+                has_nested_parts = true;
             }
         }
 
         Ok(FormatSummary {
             autos,
             indexes,
             keywords,
+            has_nested_parts,
         })
     }
 }
@@ -82,9 +86,9 @@ mod tests {
     fn test_format_summary() {
         let literal = "foo{foo}a{}b{2}c{2}d{1}{}{}e{bar}{foo}f{spam}";
 
-        let expected_autos = [0usize, 1usize, 2usize].into_iter().collect();
-        let expected_indexes = [1usize, 2usize].into_iter().collect();
-        let expected_keywords = ["foo", "bar", "spam"]
+        let expected_autos = [0usize, 1usize, 2usize].to_vec();
+        let expected_indexes = [2usize, 2usize, 1usize].to_vec();
+        let expected_keywords: Vec<_> = ["foo", "bar", "foo", "spam"]
             .into_iter()
             .map(String::from)
             .collect();
@@ -94,15 +98,16 @@ mod tests {
         assert_eq!(format_summary.autos, expected_autos);
         assert_eq!(format_summary.indexes, expected_indexes);
         assert_eq!(format_summary.keywords, expected_keywords);
+        assert!(!format_summary.has_nested_parts);
     }
 
     #[test]
     fn test_format_summary_nested() {
         let literal = "foo{foo}a{:{}{}}b{2:{3}{4}}c{2}d{1}{}e{bar:{spam}{eggs}}";
 
-        let expected_autos = [0usize, 1usize, 2usize, 3usize].into_iter().collect();
-        let expected_indexes = [1usize, 2usize, 3usize, 4usize].into_iter().collect();
-        let expected_keywords = ["foo", "bar", "spam", "eggs"]
+        let expected_autos = [0usize, 1usize, 2usize, 3usize].to_vec();
+        let expected_indexes = [2usize, 3usize, 4usize, 2usize, 1usize].to_vec();
+        let expected_keywords: Vec<_> = ["foo", "bar", "spam", "eggs"]
             .into_iter()
             .map(String::from)
             .collect();
@@ -112,6 +117,7 @@ mod tests {
         assert_eq!(format_summary.autos, expected_autos);
         assert_eq!(format_summary.indexes, expected_indexes);
         assert_eq!(format_summary.keywords, expected_keywords);
+        assert!(format_summary.has_nested_parts);
     }
 
     #[test]

diff --git a/src/pyupgrade/mod.rs b/src/pyupgrade/mod.rs
@@ -50,6 +50,8 @@ mod tests {
     #[test_case(RuleCode::UP028, Path::new("UP028_0.py"); "UP028_0")]
     #[test_case(RuleCode::UP028, Path::new("UP028_1.py"); "UP028_1")]
     #[test_case(RuleCode::UP029, Path::new("UP029.py"); "UP029")]
+    #[test_case(RuleCode::UP030, Path::new("UP030_0.py"); "UP030_0")]
+    #[test_case(RuleCode::UP030, Path::new("UP030_1.py"); "UP030_1")]
     fn rules(rule_code: RuleCode, path: &Path) -> Result<()> {
         let snapshot = format!("{}_{}", rule_code.as_ref(), path.to_string_lossy());
         let diagnostics = test_path(

diff --git a/src/pyupgrade/rules/format_literals.rs b/src/pyupgrade/rules/format_literals.rs
@@ -0,0 +1,118 @@
+use anyhow::{anyhow, bail, Result};
+use libcst_native::{Arg, Codegen, CodegenState, Expression};
+use once_cell::sync::Lazy;
+use regex::Regex;
+use rustpython_ast::Expr;
+
+use crate::ast::types::Range;
+use crate::autofix::Fix;
+use crate::checkers::ast::Checker;
+use crate::cst::matchers::{match_call, match_expression};
+use crate::pyflakes::format::FormatSummary;
+use crate::registry::Diagnostic;
+use crate::violations;
+
+// An opening curly brace, followed by any integer, followed by any text,
+// followed by a closing brace.
+static FORMAT_SPECIFIER: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"\{(?P<int>\d+)(?P<fmt>.*?)}").unwrap());
+
+/// Returns a string without the format specifiers.
+/// Ex. "Hello {0} {1}" -> "Hello {} {}"
+fn remove_specifiers(raw_specifiers: &str) -> String {
+    FORMAT_SPECIFIER
+        .replace_all(raw_specifiers, "{$fmt}")
+        .to_string()
+}
+
+/// Return the corrected argument vector.
+fn generate_arguments<'a>(
+    old_args: &[Arg<'a>],
+    correct_order: &'a [usize],
+) -> Result<Vec<Arg<'a>>> {
+    let mut new_args: Vec<Arg> = Vec::with_capacity(old_args.len());
+    for (idx, given) in correct_order.iter().enumerate() {
+        // We need to keep the formatting in the same order but move the values.
+        let values = old_args
+            .get(*given)
+            .ok_or_else(|| anyhow!("Failed to extract argument at: {given}"))?;
+        let formatting = old_args
+            .get(idx)
+            .ok_or_else(|| anyhow!("Failed to extract argument at: {idx}"))?;
+        let new_arg = Arg {
+            value: values.value.clone(),
+            comma: formatting.comma.clone(),
+            equal: None,
+            keyword: None,
+            star: values.star,
+            whitespace_after_star: formatting.whitespace_after_star.clone(),
+            whitespace_after_arg: formatting.whitespace_after_arg.clone(),
+        };
+        new_args.push(new_arg);
+    }
+    Ok(new_args)
+}
+
+/// Returns the corrected function call.
+fn generate_call(module_text: &str, correct_order: &[usize]) -> Result<String> {
+    let mut expression = match_expression(module_text)?;
+    let mut call = match_call(&mut expression)?;
+
+    // Fix the call arguments.
+    call.args = generate_arguments(&call.args, correct_order)?;
+
+    // Fix the string itself.
+    let Expression::Attribute(item) = &*call.func else {
+        panic!("Expected: Expression::Attribute")
+    };
+
+    let mut state = CodegenState::default();
+    item.codegen(&mut state);
+    let cleaned = remove_specifiers(&state.to_string());
+
+    call.func = Box::new(match_expression(&cleaned)?);
+
+    let mut state = CodegenState::default();
+    expression.codegen(&mut state);
+    if module_text == state.to_string() {
+        // Ex) `'{' '0}'.format(1)`
+        bail!("Failed to generate call expression for: {module_text}")
+    }
+    Ok(state.to_string())
+}
+
+/// UP030
+pub(crate) fn format_literals(checker: &mut Checker, summary: &FormatSummary, expr: &Expr) {
+    // The format we expect is, e.g.: `"{0} {1}".format(...)`
+    if summary.has_nested_parts {
+        return;
+    }
+    if !summary.keywords.is_empty() {
+        return;
+    }
+    if !summary.autos.is_empty() {
+        return;
+    }
+    if !(0..summary.indexes.len()).all(|index| summary.indexes.contains(&index)) {
+        return;
+    }
+
+    let mut diagnostic = Diagnostic::new(violations::FormatLiterals, Range::from_located(expr));
+    if checker.patch(diagnostic.kind.code()) {
+        // Currently, the only issue we know of is in LibCST:
+        // https://github.com/Instagram/LibCST/issues/846
+        if let Ok(contents) = generate_call(
+            &checker
+                .locator
+                .slice_source_code_range(&Range::from_located(expr)),
+            &summary.indexes,
+        ) {
+            diagnostic.amend(Fix::replacement(
+                contents,
+                expr.location,
+                expr.end_location.unwrap(),
+            ));
+        };
+    }
+    checker.diagnostics.push(diagnostic);
+}